resolve conflict
This commit is contained in:
12
Makefile
12
Makefile
@@ -4,6 +4,7 @@ LIBSUFFIX=a
|
|||||||
CP=cp
|
CP=cp
|
||||||
ROOTDIR=$(PWD)
|
ROOTDIR=$(PWD)
|
||||||
|
|
||||||
|
|
||||||
ifeq (,$(wildcard ./gtest))
|
ifeq (,$(wildcard ./gtest))
|
||||||
HAVE_GTEST=No
|
HAVE_GTEST=No
|
||||||
else
|
else
|
||||||
@@ -13,20 +14,22 @@ endif
|
|||||||
# Configurations
|
# Configurations
|
||||||
ifeq ($(BUILDTYPE), Release)
|
ifeq ($(BUILDTYPE), Release)
|
||||||
CFLAGS += -O3
|
CFLAGS += -O3
|
||||||
ifneq ($(ENABLE64BIT), Yes)
|
|
||||||
USE_ASM = Yes
|
USE_ASM = Yes
|
||||||
endif
|
|
||||||
else
|
else
|
||||||
CFLAGS = -g
|
CFLAGS = -g
|
||||||
USE_ASM = No
|
USE_ASM = No
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(ENABLE64BIT), Yes)
|
ifeq ($(ENABLE64BIT), Yes)
|
||||||
CFLAGS += -m64
|
CFLAGS += -m64
|
||||||
LDFLAGS += -m64
|
LDFLAGS += -m64
|
||||||
|
ASMFLAGS += -DUNIX64
|
||||||
else
|
else
|
||||||
CFLAGS += -m32
|
CFLAGS += -m32
|
||||||
LDFLAGS += -m32
|
LDFLAGS += -m32
|
||||||
|
ASMFLAGS += -DX86_32
|
||||||
endif
|
endif
|
||||||
|
|
||||||
include build/platform-$(UNAME).mk
|
include build/platform-$(UNAME).mk
|
||||||
|
|
||||||
ifeq ($(USE_ASM),Yes)
|
ifeq ($(USE_ASM),Yes)
|
||||||
@@ -40,7 +43,8 @@ ASMFLAGS += -DNO_DYNAMIC_VP
|
|||||||
|
|
||||||
#### No user-serviceable parts below this line
|
#### No user-serviceable parts below this line
|
||||||
INCLUDES = -Icodec/api/svc -Icodec/common -Igtest/include
|
INCLUDES = -Icodec/api/svc -Icodec/common -Igtest/include
|
||||||
ASM_INCLUDES = -Iprocessing/src/asm/
|
#ASM_INCLUDES = -Iprocessing/src/asm/
|
||||||
|
ASM_INCLUDES = -Icodec/common/
|
||||||
|
|
||||||
COMMON_INCLUDES = \
|
COMMON_INCLUDES = \
|
||||||
-Icodec/decoder/core/inc
|
-Icodec/decoder/core/inc
|
||||||
@@ -83,7 +87,7 @@ test:
|
|||||||
include codec/common/targets.mk
|
include codec/common/targets.mk
|
||||||
include codec/decoder/targets.mk
|
include codec/decoder/targets.mk
|
||||||
include codec/encoder/targets.mk
|
include codec/encoder/targets.mk
|
||||||
include processing/targets.mk
|
include codec/processing/targets.mk
|
||||||
include codec/console/dec/targets.mk
|
include codec/console/dec/targets.mk
|
||||||
include codec/console/enc/targets.mk
|
include codec/console/enc/targets.mk
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
(cd codec/decoder; python ../../build/mktargets.py --directory codec/decoder --library decoder --exclude StdAfx.cpp)
|
(cd codec/decoder; python ../../build/mktargets.py --directory codec/decoder --library decoder --exclude StdAfx.cpp)
|
||||||
(cd codec/encoder; python ../../build/mktargets.py --directory codec/encoder --library encoder --exclude DllEntry.cpp)
|
(cd codec/encoder; python ../../build/mktargets.py --directory codec/encoder --library encoder --exclude DllEntry.cpp)
|
||||||
(cd codec/common; python ../../build/mktargets.py --directory codec/common --library common)
|
(cd codec/common; python ../../build/mktargets.py --directory codec/common --library common)
|
||||||
(cd processing; python ../build/mktargets.py --directory processing --library processing --exclude wels_process.cpp --exclude WelsVideoProcessor.cpp)
|
(cd codec/processing; python ../../build/mktargets.py --directory codec/processing --library processing --exclude wels_process.cpp --exclude WelsVideoProcessor.cpp)
|
||||||
|
|
||||||
(cd codec/console/dec; python ../../../build/mktargets.py --directory codec/console/dec --binary h264dec --exclude dec_console.h --exclude load_bundle_functions.cpp)
|
(cd codec/console/dec; python ../../../build/mktargets.py --directory codec/console/dec --binary h264dec --exclude dec_console.h --exclude load_bundle_functions.cpp)
|
||||||
(cd codec/console/enc; python ../../../build/mktargets.py --directory codec/console/enc --binary h264enc --exclude enc_console.h --exclude bundlewelsenc.cpp)
|
(cd codec/console/enc; python ../../../build/mktargets.py --directory codec/console/enc --binary h264enc --exclude enc_console.h --exclude bundlewelsenc.cpp)
|
||||||
|
|||||||
@@ -1,5 +1,11 @@
|
|||||||
USE_ASM = No # We don't have ASM working on Mac yet
|
|
||||||
ASM = nasm
|
ASM = nasm
|
||||||
CFLAGS += -Werror -fPIC
|
CFLAGS += -Werror -fPIC
|
||||||
LDFLAGS += -lpthread
|
LDFLAGS += -lpthread
|
||||||
ASMFLAGS += -f macho --prefix _ -DNOPREFIX
|
ASMFLAGS += --prefix _ -DNOPREFIX
|
||||||
|
ifeq ($(ENABLE64BIT), Yes)
|
||||||
|
ASMFLAGS += -f macho64
|
||||||
|
else
|
||||||
|
ASMFLAGS += -f macho
|
||||||
|
endif
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,10 @@
|
|||||||
ASM = nasm
|
ASM = nasm
|
||||||
CFLAGS += -Werror -fPIC -DLINUX -D__NO_CTYPE
|
CFLAGS += -Werror -fPIC -DLINUX -D__NO_CTYPE
|
||||||
LDFLAGS += -lpthread
|
LDFLAGS += -lpthread
|
||||||
ASMFLAGS += -f elf -DNOPREFIX
|
ASMFLAGS += -DNOPREFIX
|
||||||
|
ifeq ($(ENABLE64BIT), Yes)
|
||||||
|
ASMFLAGS += -f elf64
|
||||||
|
else
|
||||||
|
ASMFLAGS += -f elf32
|
||||||
|
endif
|
||||||
|
|
||||||
|
|||||||
@@ -348,44 +348,6 @@
|
|||||||
Name="asm"
|
Name="asm"
|
||||||
Filter="*.asm;*.inc"
|
Filter="*.asm;*.inc"
|
||||||
>
|
>
|
||||||
<File
|
|
||||||
RelativePath="..\..\..\decoder\core\asm\asm_inc.asm"
|
|
||||||
>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Release|Win32"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Release|x64"
|
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Debug|Win32"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Debug|x64"
|
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
</File>
|
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\decoder\core\asm\block_add.asm"
|
RelativePath="..\..\..\decoder\core\asm\block_add.asm"
|
||||||
>
|
>
|
||||||
@@ -394,17 +356,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -413,40 +374,38 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Debug|x64"
|
Name="Debug|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\decoder\core\asm\cpuid.asm"
|
RelativePath="..\..\..\common\cpuid.asm"
|
||||||
>
|
>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|Win32"
|
Name="Release|Win32"
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -455,17 +414,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Debug|x64"
|
Name="Debug|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -478,17 +436,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -497,40 +454,38 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Debug|x64"
|
Name="Debug|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\decoder\core\asm\deblock.asm"
|
RelativePath="..\..\..\common\deblock.asm"
|
||||||
>
|
>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|Win32"
|
Name="Release|Win32"
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -539,40 +494,38 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Debug|x64"
|
Name="Debug|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\decoder\core\asm\expand_picture.asm"
|
RelativePath="..\..\..\common\expand_picture.asm"
|
||||||
>
|
>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|Win32"
|
Name="Release|Win32"
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -581,17 +534,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Debug|x64"
|
Name="Debug|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -604,17 +556,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -623,40 +574,38 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Debug|x64"
|
Name="Debug|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\decoder\core\asm\mb_copy.asm"
|
RelativePath="..\..\..\common\mb_copy.asm"
|
||||||
>
|
>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|Win32"
|
Name="Release|Win32"
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -665,40 +614,38 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Debug|x64"
|
Name="Debug|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\decoder\core\asm\mc_chroma.asm"
|
RelativePath="..\..\..\common\mc_chroma.asm"
|
||||||
>
|
>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|Win32"
|
Name="Release|Win32"
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -707,40 +654,38 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Debug|x64"
|
Name="Debug|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\decoder\core\asm\mc_luma.asm"
|
RelativePath="..\..\..\common\mc_luma.asm"
|
||||||
>
|
>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|Win32"
|
Name="Release|Win32"
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -749,59 +694,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Debug|x64"
|
Name="Debug|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
</File>
|
|
||||||
<File
|
|
||||||
RelativePath="..\..\..\decoder\core\asm\memzero.asm"
|
|
||||||
>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Release|Win32"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Release|x64"
|
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -O3 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Debug|Win32"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Debug|x64"
|
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
|
|||||||
@@ -94,8 +94,8 @@
|
|||||||
<ClCompile>
|
<ClCompile>
|
||||||
<Optimization>MaxSpeed</Optimization>
|
<Optimization>MaxSpeed</Optimization>
|
||||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||||
<AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common\inc;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>WIN32;NDEBUG;_LIB;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN32;NDEBUG;X86_ASM;_LIB;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<StringPooling>true</StringPooling>
|
<StringPooling>true</StringPooling>
|
||||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
@@ -125,8 +125,8 @@
|
|||||||
<ClCompile>
|
<ClCompile>
|
||||||
<Optimization>MaxSpeed</Optimization>
|
<Optimization>MaxSpeed</Optimization>
|
||||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||||
<AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common\inc;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>WIN64;NDEBUG;_LIB;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN64;NDEBUG;X86_ASM;_LIB;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<StringPooling>true</StringPooling>
|
<StringPooling>true</StringPooling>
|
||||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
@@ -151,11 +151,15 @@
|
|||||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||||
<OutputFile>$(OutDir)\WelsDecCore.bsc</OutputFile>
|
<OutputFile>$(OutDir)\WelsDecCore.bsc</OutputFile>
|
||||||
</Bscmake>
|
</Bscmake>
|
||||||
|
<CustomBuild>
|
||||||
|
<Outputs>$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command>nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
</CustomBuild>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
<ClCompile>
|
<ClCompile>
|
||||||
<Optimization>Disabled</Optimization>
|
<Optimization>Disabled</Optimization>
|
||||||
<AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common\inc;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>WIN32;_DEBUG;_LIB;X86_ASM;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN32;_DEBUG;_LIB;X86_ASM;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<MinimalRebuild>true</MinimalRebuild>
|
<MinimalRebuild>true</MinimalRebuild>
|
||||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||||
@@ -184,7 +188,7 @@
|
|||||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
<ClCompile>
|
<ClCompile>
|
||||||
<Optimization>Disabled</Optimization>
|
<Optimization>Disabled</Optimization>
|
||||||
<AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common\inc;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>..\..\..\decoder\core\inc;..\..\..\common;..\..\..\api\svc;..\..\..\hwDecoder\core\inc;..\..\..\hwDecoder\dxva\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>WIN64;_DEBUG;_LIB;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN64;_DEBUG;_LIB;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||||
@@ -208,176 +212,45 @@
|
|||||||
<SuppressStartupBanner>true</SuppressStartupBanner>
|
<SuppressStartupBanner>true</SuppressStartupBanner>
|
||||||
<OutputFile>$(OutDir)\WelsDecCore.bsc</OutputFile>
|
<OutputFile>$(OutDir)\WelsDecCore.bsc</OutputFile>
|
||||||
</Bscmake>
|
</Bscmake>
|
||||||
|
<CustomBuild>
|
||||||
|
<Command>nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs>$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
</CustomBuild>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\asm_inc.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\block_add.asm">
|
<CustomBuild Include="..\..\..\decoder\core\asm\block_add.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\cpuid.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\dct.asm">
|
<CustomBuild Include="..\..\..\decoder\core\asm\dct.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\deblock.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\expand_picture.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\intra_pred.asm">
|
<CustomBuild Include="..\..\..\decoder\core\asm\intra_pred.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\mb_copy.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\mc_chroma.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\mc_luma.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\memzero.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<ClInclude Include="..\..\..\common\logging.h" />
|
||||||
<ClInclude Include="..\..\..\decoder\core\inc\as264_common.h" />
|
<ClInclude Include="..\..\..\decoder\core\inc\as264_common.h" />
|
||||||
<ClInclude Include="..\..\..\decoder\core\inc\au_parser.h" />
|
<ClInclude Include="..\..\..\decoder\core\inc\au_parser.h" />
|
||||||
<ClInclude Include="..\..\..\decoder\core\inc\bit_stream.h" />
|
<ClInclude Include="..\..\..\decoder\core\inc\bit_stream.h" />
|
||||||
@@ -419,6 +292,7 @@
|
|||||||
<ClInclude Include="..\..\..\decoder\core\inc\wels_const.h" />
|
<ClInclude Include="..\..\..\decoder\core\inc\wels_const.h" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<ClCompile Include="..\..\..\common\logging.cpp" />
|
||||||
<ClCompile Include="..\..\..\decoder\core\src\au_parser.cpp" />
|
<ClCompile Include="..\..\..\decoder\core\src\au_parser.cpp" />
|
||||||
<ClCompile Include="..\..\..\decoder\core\src\bit_stream.cpp" />
|
<ClCompile Include="..\..\..\decoder\core\src\bit_stream.cpp" />
|
||||||
<ClCompile Include="..\..\..\decoder\core\src\cpu.cpp" />
|
<ClCompile Include="..\..\..\decoder\core\src\cpu.cpp" />
|
||||||
@@ -441,6 +315,68 @@
|
|||||||
<ClCompile Include="..\..\..\decoder\core\src\decoder_core.cpp" />
|
<ClCompile Include="..\..\..\decoder\core\src\decoder_core.cpp" />
|
||||||
<ClCompile Include="..\..\..\decoder\core\src\utils.cpp" />
|
<ClCompile Include="..\..\..\decoder\core\src\utils.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="..\..\..\common\cpuid.asm">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\deblock.asm">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\expand_picture.asm">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\mb_copy.asm">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\mc_chroma.asm">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\mc_luma.asm">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
</CustomBuild>
|
||||||
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\masm.targets" />
|
<Import Project="$(VCTargetsPath)\BuildCustomizations\masm.targets" />
|
||||||
|
|||||||
@@ -64,6 +64,9 @@
|
|||||||
<ClCompile Include="..\..\..\decoder\core\src\utils.cpp">
|
<ClCompile Include="..\..\..\decoder\core\src\utils.cpp">
|
||||||
<Filter>sources</Filter>
|
<Filter>sources</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\..\..\common\logging.cpp">
|
||||||
|
<Filter>sources</Filter>
|
||||||
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="..\..\..\decoder\core\inc\as264_common.h">
|
<ClInclude Include="..\..\..\decoder\core\inc\as264_common.h">
|
||||||
@@ -183,39 +186,36 @@
|
|||||||
<ClInclude Include="..\..\..\decoder\core\inc\wels_common_basis.h">
|
<ClInclude Include="..\..\..\decoder\core\inc\wels_common_basis.h">
|
||||||
<Filter>headers</Filter>
|
<Filter>headers</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
<ClInclude Include="..\..\..\common\logging.h">
|
||||||
|
<Filter>headers</Filter>
|
||||||
|
</ClInclude>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\asm_inc.asm">
|
|
||||||
<Filter>ASM</Filter>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\block_add.asm">
|
<CustomBuild Include="..\..\..\decoder\core\asm\block_add.asm">
|
||||||
<Filter>ASM</Filter>
|
<Filter>ASM</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\cpuid.asm">
|
|
||||||
<Filter>ASM</Filter>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\dct.asm">
|
<CustomBuild Include="..\..\..\decoder\core\asm\dct.asm">
|
||||||
<Filter>ASM</Filter>
|
<Filter>ASM</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\deblock.asm">
|
|
||||||
<Filter>ASM</Filter>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\expand_picture.asm">
|
|
||||||
<Filter>ASM</Filter>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\intra_pred.asm">
|
<CustomBuild Include="..\..\..\decoder\core\asm\intra_pred.asm">
|
||||||
<Filter>ASM</Filter>
|
<Filter>ASM</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\mb_copy.asm">
|
<CustomBuild Include="..\..\..\common\mc_luma.asm">
|
||||||
<Filter>ASM</Filter>
|
<Filter>ASM</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\mc_chroma.asm">
|
<CustomBuild Include="..\..\..\common\mc_chroma.asm">
|
||||||
<Filter>ASM</Filter>
|
<Filter>ASM</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\mc_luma.asm">
|
<CustomBuild Include="..\..\..\common\mb_copy.asm">
|
||||||
<Filter>ASM</Filter>
|
<Filter>ASM</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\decoder\core\asm\memzero.asm">
|
<CustomBuild Include="..\..\..\common\expand_picture.asm">
|
||||||
|
<Filter>ASM</Filter>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\deblock.asm">
|
||||||
|
<Filter>ASM</Filter>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\cpuid.asm">
|
||||||
<Filter>ASM</Filter>
|
<Filter>ASM</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|||||||
@@ -107,7 +107,7 @@
|
|||||||
<ClCompile>
|
<ClCompile>
|
||||||
<Optimization>MaxSpeed</Optimization>
|
<Optimization>MaxSpeed</Optimization>
|
||||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||||
<AdditionalIncludeDirectories>..\..\..\decoder\plus\inc;..\..\..\decoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\hwDecoder\plus\inc;..\..\..\hwDecoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>..\..\..\common;..\..\..\decoder\plus\inc;..\..\..\decoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\hwDecoder\plus\inc;..\..\..\hwDecoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;WELSDECPLUS_EXPORTS;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;WELSDECPLUS_EXPORTS;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<StringPooling>true</StringPooling>
|
<StringPooling>true</StringPooling>
|
||||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||||
@@ -156,7 +156,7 @@
|
|||||||
<ClCompile>
|
<ClCompile>
|
||||||
<Optimization>MaxSpeed</Optimization>
|
<Optimization>MaxSpeed</Optimization>
|
||||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||||
<AdditionalIncludeDirectories>..\..\..\decoder\plus\inc;..\..\..\decoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\hwDecoder\plus\inc;..\..\..\hwDecoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>..\..\..\common;..\..\..\decoder\plus\inc;..\..\..\decoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\hwDecoder\plus\inc;..\..\..\hwDecoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;WELSDECPLUS_EXPORTS;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN32;NDEBUG;_WINDOWS;_USRDLL;WELSDECPLUS_EXPORTS;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<StringPooling>true</StringPooling>
|
<StringPooling>true</StringPooling>
|
||||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||||
@@ -204,7 +204,7 @@
|
|||||||
</Midl>
|
</Midl>
|
||||||
<ClCompile>
|
<ClCompile>
|
||||||
<Optimization>Disabled</Optimization>
|
<Optimization>Disabled</Optimization>
|
||||||
<AdditionalIncludeDirectories>..\..\..\decoder\plus\inc;..\..\..\decoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\hwDecoder\plus\inc;..\..\..\hwDecoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>..\..\..\common;..\..\..\decoder\plus\inc;..\..\..\decoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\hwDecoder\plus\inc;..\..\..\hwDecoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;WELSDECPLUS_EXPORTS;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;WELSDECPLUS_EXPORTS;HAVE_CACHE_LINE_ALIGN;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<MinimalRebuild>true</MinimalRebuild>
|
<MinimalRebuild>true</MinimalRebuild>
|
||||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||||
|
|||||||
@@ -102,7 +102,7 @@
|
|||||||
<ClCompile>
|
<ClCompile>
|
||||||
<Optimization>MaxSpeed</Optimization>
|
<Optimization>MaxSpeed</Optimization>
|
||||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||||
<AdditionalIncludeDirectories>..\..\..\console\dec\inc;..\..\..\api\svc;..\..\..\common;..\..\..\encoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>..\..\..\common;..\..\..\console\dec\inc;..\..\..\api\svc;..\..\..\common;..\..\..\encoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<StringPooling>true</StringPooling>
|
<StringPooling>true</StringPooling>
|
||||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||||
@@ -144,7 +144,7 @@
|
|||||||
<ClCompile>
|
<ClCompile>
|
||||||
<Optimization>MaxSpeed</Optimization>
|
<Optimization>MaxSpeed</Optimization>
|
||||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||||
<AdditionalIncludeDirectories>..\..\..\console\dec\inc;..\..\..\api\svc;..\..\..\common;..\..\..\encoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>..\..\..\common;..\..\..\console\dec\inc;..\..\..\api\svc;..\..\..\common;..\..\..\encoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<StringPooling>true</StringPooling>
|
<StringPooling>true</StringPooling>
|
||||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||||
@@ -227,7 +227,7 @@
|
|||||||
</Midl>
|
</Midl>
|
||||||
<ClCompile>
|
<ClCompile>
|
||||||
<Optimization>Disabled</Optimization>
|
<Optimization>Disabled</Optimization>
|
||||||
<AdditionalIncludeDirectories>..\..\..\console\dec\inc;..\..\..\api\svc;..\..\..\common;..\..\..\encoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>..\..\..\common;..\..\..\console\dec\inc;..\..\..\api\svc;..\..\..\common;..\..\..\encoder\core\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -127,7 +127,7 @@
|
|||||||
<ClCompile>
|
<ClCompile>
|
||||||
<Optimization>Disabled</Optimization>
|
<Optimization>Disabled</Optimization>
|
||||||
<AdditionalIncludeDirectories>..\..\..\encoder\core\inc;..\..\..\api\svc;..\..\..\WelsThreadLib\api;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>..\..\..\encoder\core\inc;..\..\..\api\svc;..\..\..\WelsThreadLib\api;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>WIN64;_DEBUG;_LIB;WELS_SVC;ENCODER_CORE;HAVE_CACHE_LINE_ALIGN;MT_ENABLED;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN64;_DEBUG;X86_ASM;_LIB;WELS_SVC;ENCODER_CORE;HAVE_CACHE_LINE_ALIGN;MT_ENABLED;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||||
<PrecompiledHeaderOutputFile>.\..\..\..\obj\encoder\core\Debug/WelsEncCore.pch</PrecompiledHeaderOutputFile>
|
<PrecompiledHeaderOutputFile>.\..\..\..\obj\encoder\core\Debug/WelsEncCore.pch</PrecompiledHeaderOutputFile>
|
||||||
@@ -197,7 +197,7 @@
|
|||||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||||
<AdditionalIncludeDirectories>..\..\..\encoder\core\inc;..\..\..\api\svc;..\..\..\WelsThreadLib\api;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>..\..\..\encoder\core\inc;..\..\..\api\svc;..\..\..\WelsThreadLib\api;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>WIN64;NDEBUG;_LIB;WELS_SVC;ENCODER_CORE;HAVE_CACHE_LINE_ALIGN;MT_ENABLED;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN64;NDEBUG;X86_ASM;_LIB;WELS_SVC;ENCODER_CORE;HAVE_CACHE_LINE_ALIGN;MT_ENABLED;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<StringPooling>true</StringPooling>
|
<StringPooling>true</StringPooling>
|
||||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||||
@@ -565,255 +565,154 @@
|
|||||||
<ClInclude Include="..\..\..\encoder\core\inc\wels_preprocess.h" />
|
<ClInclude Include="..\..\..\encoder\core\inc\wels_preprocess.h" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\asm_inc.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\coeff.asm">
|
<CustomBuild Include="..\..\..\encoder\core\asm\coeff.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\cpuid.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\dct.asm">
|
<CustomBuild Include="..\..\..\encoder\core\asm\dct.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\deblock.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\expand_picture.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\intra_pred.asm">
|
<CustomBuild Include="..\..\..\encoder\core\asm\intra_pred.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\intra_pred_util.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\mb_copy.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\mc_chroma.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\mc_luma.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\memzero.asm">
|
<CustomBuild Include="..\..\..\encoder\core\asm\memzero.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\quant.asm">
|
<CustomBuild Include="..\..\..\encoder\core\asm\quant.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\satd_sad.asm">
|
<CustomBuild Include="..\..\..\encoder\core\asm\satd_sad.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\score.asm">
|
<CustomBuild Include="..\..\..\encoder\core\asm\score.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\vaa.asm">
|
</ItemGroup>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<ItemGroup>
|
||||||
</Command>
|
<CustomBuild Include="..\..\..\common\cpuid.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<FileType>Document</FileType>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -O3 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\deblock.asm">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\expand_picture.asm">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\mb_copy.asm">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\mc_chroma.asm">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\mc_luma.asm">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\vaa.asm">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
|
|||||||
@@ -278,39 +278,15 @@
|
|||||||
</ClInclude>
|
</ClInclude>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\asm_inc.asm">
|
|
||||||
<Filter>ASM</Filter>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\coeff.asm">
|
<CustomBuild Include="..\..\..\encoder\core\asm\coeff.asm">
|
||||||
<Filter>ASM</Filter>
|
<Filter>ASM</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\cpuid.asm">
|
|
||||||
<Filter>ASM</Filter>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\dct.asm">
|
<CustomBuild Include="..\..\..\encoder\core\asm\dct.asm">
|
||||||
<Filter>ASM</Filter>
|
<Filter>ASM</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\deblock.asm">
|
|
||||||
<Filter>ASM</Filter>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\expand_picture.asm">
|
|
||||||
<Filter>ASM</Filter>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\intra_pred.asm">
|
<CustomBuild Include="..\..\..\encoder\core\asm\intra_pred.asm">
|
||||||
<Filter>ASM</Filter>
|
<Filter>ASM</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\intra_pred_util.asm">
|
|
||||||
<Filter>ASM</Filter>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\mb_copy.asm">
|
|
||||||
<Filter>ASM</Filter>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\mc_chroma.asm">
|
|
||||||
<Filter>ASM</Filter>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\mc_luma.asm">
|
|
||||||
<Filter>ASM</Filter>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\memzero.asm">
|
<CustomBuild Include="..\..\..\encoder\core\asm\memzero.asm">
|
||||||
<Filter>ASM</Filter>
|
<Filter>ASM</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
@@ -323,7 +299,25 @@
|
|||||||
<CustomBuild Include="..\..\..\encoder\core\asm\score.asm">
|
<CustomBuild Include="..\..\..\encoder\core\asm\score.asm">
|
||||||
<Filter>ASM</Filter>
|
<Filter>ASM</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\..\encoder\core\asm\vaa.asm">
|
<CustomBuild Include="..\..\..\common\mc_luma.asm">
|
||||||
|
<Filter>ASM</Filter>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\mc_chroma.asm">
|
||||||
|
<Filter>ASM</Filter>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\mb_copy.asm">
|
||||||
|
<Filter>ASM</Filter>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\expand_picture.asm">
|
||||||
|
<Filter>ASM</Filter>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\deblock.asm">
|
||||||
|
<Filter>ASM</Filter>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\cpuid.asm">
|
||||||
|
<Filter>ASM</Filter>
|
||||||
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\vaa.asm">
|
||||||
<Filter>ASM</Filter>
|
<Filter>ASM</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "encConsole", "encConsole.vc
|
|||||||
{E8DFAFA1-8DAC-4127-8D27-FBD5819EE562} = {E8DFAFA1-8DAC-4127-8D27-FBD5819EE562}
|
{E8DFAFA1-8DAC-4127-8D27-FBD5819EE562} = {E8DFAFA1-8DAC-4127-8D27-FBD5819EE562}
|
||||||
EndProjectSection
|
EndProjectSection
|
||||||
EndProject
|
EndProject
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WelsVP", "..\..\..\..\processing\build\win32\WelsVP_2008.vcproj", "{E8DFAFA1-8DAC-4127-8D27-FBD5819EE562}"
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WelsVP", "..\..\..\processing\build\win32\WelsVP_2008.vcproj", "{E8DFAFA1-8DAC-4127-8D27-FBD5819EE562}"
|
||||||
EndProject
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WelsEncPlus_2010", "WelsEnc
|
|||||||
EndProject
|
EndProject
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "encConsole_2010", "encConsole_2010.vcxproj", "{8509E2A8-2CBD-49E2-B564-3EFF1E927459}"
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "encConsole_2010", "encConsole_2010.vcxproj", "{8509E2A8-2CBD-49E2-B564-3EFF1E927459}"
|
||||||
EndProject
|
EndProject
|
||||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WelsVP_2010", "..\..\..\..\processing\build\win32\WelsVP_2010.vcxproj", "{E8DFAFA1-8DAC-4127-8D27-FBD5819EE562}"
|
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "WelsVP_2010", "..\..\..\processing\build\win32\WelsVP_2010.vcxproj", "{E8DFAFA1-8DAC-4127-8D27-FBD5819EE562}"
|
||||||
EndProject
|
EndProject
|
||||||
Global
|
Global
|
||||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
|
|||||||
@@ -55,12 +55,286 @@
|
|||||||
%define WELSEMMS
|
%define WELSEMMS
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
; Macros
|
; Macros
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
|
|
||||||
|
DEFAULT REL
|
||||||
|
|
||||||
|
%ifdef WIN64 ; Windows x64 ;************************************
|
||||||
|
|
||||||
|
BITS 64
|
||||||
|
|
||||||
|
%define arg1 rcx
|
||||||
|
%define arg2 rdx
|
||||||
|
%define arg3 r8
|
||||||
|
%define arg4 r9
|
||||||
|
%define arg5 [rsp + push_num*8 + 40]
|
||||||
|
%define arg6 [rsp + push_num*8 + 48]
|
||||||
|
%define arg7 [rsp + push_num*8 + 56]
|
||||||
|
%define arg8 [rsp + push_num*8 + 64]
|
||||||
|
%define arg9 [rsp + push_num*8 + 72]
|
||||||
|
%define arg10 [rsp + push_num*8 + 80]
|
||||||
|
|
||||||
|
%define r0 rcx
|
||||||
|
%define r1 rdx
|
||||||
|
%define r2 r8
|
||||||
|
%define r3 r9
|
||||||
|
%define r4 rax
|
||||||
|
%define r5 r10
|
||||||
|
%define r6 r11
|
||||||
|
%define r7 rsp
|
||||||
|
|
||||||
|
%define r0d ecx
|
||||||
|
%define r1d edx
|
||||||
|
%define r2d r8d
|
||||||
|
%define r3d r9d
|
||||||
|
%define r4d eax
|
||||||
|
%define r5d r10d
|
||||||
|
%define r6d r11d
|
||||||
|
|
||||||
|
%define r0w cx
|
||||||
|
%define r1w dx
|
||||||
|
%define r2w r8w
|
||||||
|
%define r3w r9w
|
||||||
|
|
||||||
|
%define r0b cl
|
||||||
|
%define r1b dl
|
||||||
|
%define r2b r8l
|
||||||
|
%define r3b r9l
|
||||||
|
|
||||||
|
%define PUSHRFLAGS pushfq
|
||||||
|
%define POPRFLAGS popfq
|
||||||
|
%define retrq rax
|
||||||
|
%define retrd eax
|
||||||
|
|
||||||
|
%elifdef UNIX64 ; Unix x64 ;************************************
|
||||||
|
|
||||||
|
BITS 64
|
||||||
|
|
||||||
|
%define arg1 rdi
|
||||||
|
%define arg2 rsi
|
||||||
|
%define arg3 rdx
|
||||||
|
%define arg4 rcx
|
||||||
|
%define arg5 r8
|
||||||
|
%define arg6 r9
|
||||||
|
%define arg7 [rsp + push_num*8 + 8]
|
||||||
|
%define arg8 [rsp + push_num*8 + 16]
|
||||||
|
%define arg9 [rsp + push_num*8 + 24]
|
||||||
|
%define arg10 [rsp + push_num*8 + 32]
|
||||||
|
|
||||||
|
%define r0 rdi
|
||||||
|
%define r1 rsi
|
||||||
|
%define r2 rdx
|
||||||
|
%define r3 rcx
|
||||||
|
%define r4 r8
|
||||||
|
%define r5 r9
|
||||||
|
%define r6 r10
|
||||||
|
%define r7 rsp
|
||||||
|
|
||||||
|
%define r0d edi
|
||||||
|
%define r1d esi
|
||||||
|
%define r2d edx
|
||||||
|
%define r3d ecx
|
||||||
|
%define r4d r8d
|
||||||
|
%define r5d r9d
|
||||||
|
%define r6d r10d
|
||||||
|
|
||||||
|
%define r0w di
|
||||||
|
%define r1w si
|
||||||
|
%define r2w dx
|
||||||
|
%define r3w cx
|
||||||
|
|
||||||
|
%define r0b dil
|
||||||
|
%define r1b sil
|
||||||
|
%define r2b dl
|
||||||
|
%define r3b cl
|
||||||
|
|
||||||
|
%define PUSHRFLAGS pushfq
|
||||||
|
%define POPRFLAGS popfq
|
||||||
|
%define retrq rax
|
||||||
|
%define retrd eax
|
||||||
|
|
||||||
|
%elifdef X86_32 ; X86_32 ;************************************
|
||||||
|
|
||||||
|
BITS 32
|
||||||
|
|
||||||
|
%define arg1 [esp + push_num*4 + 4]
|
||||||
|
%define arg2 [esp + push_num*4 + 8]
|
||||||
|
%define arg3 [esp + push_num*4 + 12]
|
||||||
|
%define arg4 [esp + push_num*4 + 16]
|
||||||
|
%define arg5 [esp + push_num*4 + 20]
|
||||||
|
%define arg6 [esp + push_num*4 + 24]
|
||||||
|
%define arg7 [esp + push_num*4 + 28]
|
||||||
|
%define arg8 [esp + push_num*4 + 32]
|
||||||
|
%define arg9 [esp + push_num*4 + 36]
|
||||||
|
%define arg10 [esp + push_num*4 + 40]
|
||||||
|
|
||||||
|
%define r0 eax
|
||||||
|
%define r1 ecx
|
||||||
|
%define r2 edx
|
||||||
|
%define r3 ebx
|
||||||
|
%define r4 esi
|
||||||
|
%define r5 edi
|
||||||
|
%define r6 ebp
|
||||||
|
%define r7 esp
|
||||||
|
|
||||||
|
%define r0d eax
|
||||||
|
%define r1d ecx
|
||||||
|
%define r2d edx
|
||||||
|
%define r3d ebx
|
||||||
|
%define r4d esi
|
||||||
|
%define r5d edi
|
||||||
|
%define r6d ebp
|
||||||
|
|
||||||
|
%define r0w ax
|
||||||
|
%define r1w cx
|
||||||
|
%define r2w dx
|
||||||
|
%define r3w bx
|
||||||
|
|
||||||
|
%define r0b al
|
||||||
|
%define r1b cl
|
||||||
|
%define r2b dl
|
||||||
|
%define r3b bl
|
||||||
|
|
||||||
|
%define PUSHRFLAGS pushfd
|
||||||
|
%define POPRFLAGS popfd
|
||||||
|
%define retrq eax ; 32 bit mode do not support 64 bits regesters
|
||||||
|
%define retrd eax
|
||||||
|
|
||||||
|
%endif
|
||||||
|
|
||||||
|
%macro LOAD_PARA 2
|
||||||
|
mov %1, %2
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro LOAD_1_PARA 0
|
||||||
|
%ifdef X86_32
|
||||||
|
mov r0, [esp + push_num*4 + 4]
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro LOAD_2_PARA 0
|
||||||
|
%ifdef X86_32
|
||||||
|
mov r0, [esp + push_num*4 + 4]
|
||||||
|
mov r1, [esp + push_num*4 + 8]
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro LOAD_3_PARA 0
|
||||||
|
%ifdef X86_32
|
||||||
|
mov r0, [esp + push_num*4 + 4]
|
||||||
|
mov r1, [esp + push_num*4 + 8]
|
||||||
|
mov r2, [esp + push_num*4 + 12]
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro LOAD_4_PARA 0
|
||||||
|
%ifdef X86_32
|
||||||
|
push r3
|
||||||
|
%assign push_num push_num+1
|
||||||
|
mov r0, [esp + push_num*4 + 4]
|
||||||
|
mov r1, [esp + push_num*4 + 8]
|
||||||
|
mov r2, [esp + push_num*4 + 12]
|
||||||
|
mov r3, [esp + push_num*4 + 16]
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro LOAD_5_PARA 0
|
||||||
|
%ifdef X86_32
|
||||||
|
push r3
|
||||||
|
push r4
|
||||||
|
%assign push_num push_num+2
|
||||||
|
mov r0, [esp + push_num*4 + 4]
|
||||||
|
mov r1, [esp + push_num*4 + 8]
|
||||||
|
mov r2, [esp + push_num*4 + 12]
|
||||||
|
mov r3, [esp + push_num*4 + 16]
|
||||||
|
mov r4, [esp + push_num*4 + 20]
|
||||||
|
%elifdef WIN64
|
||||||
|
mov r4, [rsp + push_num*8 + 40]
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro LOAD_6_PARA 0
|
||||||
|
%ifdef X86_32
|
||||||
|
push r3
|
||||||
|
push r4
|
||||||
|
push r5
|
||||||
|
%assign push_num push_num+3
|
||||||
|
mov r0, [esp + push_num*4 + 4]
|
||||||
|
mov r1, [esp + push_num*4 + 8]
|
||||||
|
mov r2, [esp + push_num*4 + 12]
|
||||||
|
mov r3, [esp + push_num*4 + 16]
|
||||||
|
mov r4, [esp + push_num*4 + 20]
|
||||||
|
mov r5, [esp + push_num*4 + 24]
|
||||||
|
%elifdef WIN64
|
||||||
|
mov r4, [rsp + push_num*8 + 40]
|
||||||
|
mov r5, [rsp + push_num*8 + 48]
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro LOAD_7_PARA 0
|
||||||
|
%ifdef X86_32
|
||||||
|
push r3
|
||||||
|
push r4
|
||||||
|
push r5
|
||||||
|
push r6
|
||||||
|
%assign push_num push_num+4
|
||||||
|
mov r0, [esp + push_num*4 + 4]
|
||||||
|
mov r1, [esp + push_num*4 + 8]
|
||||||
|
mov r2, [esp + push_num*4 + 12]
|
||||||
|
mov r3, [esp + push_num*4 + 16]
|
||||||
|
mov r4, [esp + push_num*4 + 20]
|
||||||
|
mov r5, [esp + push_num*4 + 24]
|
||||||
|
mov r6, [esp + push_num*4 + 28]
|
||||||
|
%elifdef WIN64
|
||||||
|
mov r4, [rsp + push_num*8 + 40]
|
||||||
|
mov r5, [rsp + push_num*8 + 48]
|
||||||
|
mov r6, [rsp + push_num*8 + 56]
|
||||||
|
%elifdef UNIX64
|
||||||
|
mov r6, [rsp + push_num*8 + 8]
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
%macro LOAD_4_PARA_POP 0
|
||||||
|
%ifdef X86_32
|
||||||
|
pop r3
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro LOAD_5_PARA_POP 0
|
||||||
|
%ifdef X86_32
|
||||||
|
pop r4
|
||||||
|
pop r3
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro LOAD_6_PARA_POP 0
|
||||||
|
%ifdef X86_32
|
||||||
|
pop r5
|
||||||
|
pop r4
|
||||||
|
pop r3
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro LOAD_7_PARA_POP 0
|
||||||
|
%ifdef X86_32
|
||||||
|
pop r6
|
||||||
|
pop r5
|
||||||
|
pop r4
|
||||||
|
pop r3
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro SIGN_EXTENTION 2
|
||||||
|
%ifndef X86_32
|
||||||
|
movsx %1, %2
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
%macro WELS_EXTERN 1
|
%macro WELS_EXTERN 1
|
||||||
%ifdef PREFIX
|
%ifdef PREFIX
|
||||||
global _%1
|
global _%1
|
||||||
@@ -39,20 +39,12 @@
|
|||||||
;*
|
;*
|
||||||
;*************************************************************************/
|
;*************************************************************************/
|
||||||
|
|
||||||
bits 32
|
%include "asm_inc.asm"
|
||||||
|
|
||||||
;******************************************************************************************
|
;******************************************************************************************
|
||||||
; Macros
|
; Macros
|
||||||
;******************************************************************************************
|
;******************************************************************************************
|
||||||
|
|
||||||
%macro WELS_EXTERN 1
|
|
||||||
%ifdef PREFIX
|
|
||||||
global _%1
|
|
||||||
%define %1 _%1
|
|
||||||
%else
|
|
||||||
global %1
|
|
||||||
%endif
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;******************************************************************************************
|
;******************************************************************************************
|
||||||
; Code
|
; Code
|
||||||
@@ -69,13 +61,16 @@ ALIGN 16
|
|||||||
; int32_t WelsCPUIdVerify()
|
; int32_t WelsCPUIdVerify()
|
||||||
;******************************************************************************************
|
;******************************************************************************************
|
||||||
WelsCPUIdVerify:
|
WelsCPUIdVerify:
|
||||||
pushfd ; decrease the SP by 4 and load EFLAGS register onto stack, pushfd 32 bit and pushf for 16 bit
|
push r1
|
||||||
pushfd ; need push 2 EFLAGS, one for processing and the another one for storing purpose
|
PUSHRFLAGS
|
||||||
pop ecx ; get EFLAGS to bit manipulation
|
PUSHRFLAGS
|
||||||
mov eax, ecx ; store into ecx followed
|
|
||||||
xor eax, 00200000h ; get ID flag (bit 21) of EFLAGS to directly indicate cpuid support or not
|
pop r1
|
||||||
xor eax, ecx ; get the ID flag bitwise, eax - 0: not support; otherwise: support
|
mov eax, r1d
|
||||||
popfd ; store back EFLAGS and keep unchanged for system
|
xor eax, 00200000h
|
||||||
|
xor eax, r1d
|
||||||
|
POPRFLAGS
|
||||||
|
pop r1
|
||||||
ret
|
ret
|
||||||
|
|
||||||
WELS_EXTERN WelsCPUId
|
WELS_EXTERN WelsCPUId
|
||||||
@@ -83,6 +78,44 @@ ALIGN 16
|
|||||||
;****************************************************************************************************
|
;****************************************************************************************************
|
||||||
; void WelsCPUId( int32_t uiIndex, int32_t *pFeatureA, int32_t *pFeatureB, int32_t *pFeatureC, int32_t *pFeatureD )
|
; void WelsCPUId( int32_t uiIndex, int32_t *pFeatureA, int32_t *pFeatureB, int32_t *pFeatureC, int32_t *pFeatureD )
|
||||||
;****************************************************************************************************
|
;****************************************************************************************************
|
||||||
|
%ifdef WIN64
|
||||||
|
|
||||||
|
WelsCPUId:
|
||||||
|
push rbx
|
||||||
|
push rdx
|
||||||
|
|
||||||
|
mov eax, ecx
|
||||||
|
cpuid
|
||||||
|
mov [r9], ecx
|
||||||
|
mov [r8], ebx
|
||||||
|
mov rcx, [rsp + 2*8 + 40]
|
||||||
|
mov [rcx], edx
|
||||||
|
pop rdx
|
||||||
|
mov [rdx], eax
|
||||||
|
|
||||||
|
pop rbx
|
||||||
|
ret
|
||||||
|
|
||||||
|
%elifdef UNIX64
|
||||||
|
WelsCPUId:
|
||||||
|
push rbx
|
||||||
|
push rcx
|
||||||
|
push rdx
|
||||||
|
|
||||||
|
mov eax, edi
|
||||||
|
cpuid
|
||||||
|
mov [r8], edx
|
||||||
|
pop rdx
|
||||||
|
pop r8
|
||||||
|
mov [r8], ecx
|
||||||
|
mov [rdx], ebx
|
||||||
|
mov [rsi], eax
|
||||||
|
|
||||||
|
pop rbx
|
||||||
|
ret
|
||||||
|
|
||||||
|
%elifdef X86_32
|
||||||
|
|
||||||
WelsCPUId:
|
WelsCPUId:
|
||||||
push ebx
|
push ebx
|
||||||
push edi
|
push edi
|
||||||
@@ -104,6 +137,8 @@ WelsCPUId:
|
|||||||
pop ebx
|
pop ebx
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
%endif
|
||||||
|
|
||||||
WELS_EXTERN WelsCPUSupportAVX
|
WELS_EXTERN WelsCPUSupportAVX
|
||||||
; need call after cpuid=1 and eax, ecx flag got then
|
; need call after cpuid=1 and eax, ecx flag got then
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
@@ -111,8 +146,16 @@ ALIGN 16
|
|||||||
; int32_t WelsCPUSupportAVX( uint32_t eax, uint32_t ecx )
|
; int32_t WelsCPUSupportAVX( uint32_t eax, uint32_t ecx )
|
||||||
;****************************************************************************************************
|
;****************************************************************************************************
|
||||||
WelsCPUSupportAVX:
|
WelsCPUSupportAVX:
|
||||||
|
%ifdef WIN64
|
||||||
|
mov eax, ecx
|
||||||
|
mov ecx, edx
|
||||||
|
%elifdef UNIX64
|
||||||
|
mov eax, edi
|
||||||
|
mov ecx, esi
|
||||||
|
%else
|
||||||
mov eax, [esp+4]
|
mov eax, [esp+4]
|
||||||
mov ecx, [esp+8]
|
mov ecx, [esp+8]
|
||||||
|
%endif
|
||||||
|
|
||||||
; refer to detection of AVX addressed in INTEL AVX manual document
|
; refer to detection of AVX addressed in INTEL AVX manual document
|
||||||
and ecx, 018000000H
|
and ecx, 018000000H
|
||||||
@@ -130,6 +173,7 @@ avx_not_supported:
|
|||||||
mov eax, 0
|
mov eax, 0
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
WELS_EXTERN WelsCPUSupportFMA
|
WELS_EXTERN WelsCPUSupportFMA
|
||||||
; need call after cpuid=1 and eax, ecx flag got then
|
; need call after cpuid=1 and eax, ecx flag got then
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
@@ -137,9 +181,16 @@ ALIGN 16
|
|||||||
; int32_t WelsCPUSupportFMA( uint32_t eax, uint32_t ecx )
|
; int32_t WelsCPUSupportFMA( uint32_t eax, uint32_t ecx )
|
||||||
;****************************************************************************************************
|
;****************************************************************************************************
|
||||||
WelsCPUSupportFMA:
|
WelsCPUSupportFMA:
|
||||||
|
%ifdef WIN64
|
||||||
|
mov eax, ecx
|
||||||
|
mov ecx, edx
|
||||||
|
%elifdef UNIX64
|
||||||
|
mov eax, edi
|
||||||
|
mov ecx, esi
|
||||||
|
%else
|
||||||
mov eax, [esp+4]
|
mov eax, [esp+4]
|
||||||
mov ecx, [esp+8]
|
mov ecx, [esp+8]
|
||||||
|
%endif
|
||||||
; refer to detection of FMA addressed in INTEL AVX manual document
|
; refer to detection of FMA addressed in INTEL AVX manual document
|
||||||
and ecx, 018001000H
|
and ecx, 018001000H
|
||||||
cmp ecx, 018001000H ; check OSXSAVE, AVX, FMA feature flags
|
cmp ecx, 018001000H ; check OSXSAVE, AVX, FMA feature flags
|
||||||
5325
codec/common/deblock.asm
Normal file
5325
codec/common/deblock.asm
Normal file
File diff suppressed because it is too large
Load Diff
740
codec/common/expand_picture.asm
Normal file
740
codec/common/expand_picture.asm
Normal file
@@ -0,0 +1,740 @@
|
|||||||
|
;*!
|
||||||
|
;* \copy
|
||||||
|
;* Copyright (c) 2009-2013, Cisco Systems
|
||||||
|
;* All rights reserved.
|
||||||
|
;*
|
||||||
|
;* Redistribution and use in source and binary forms, with or without
|
||||||
|
;* modification, are permitted provided that the following conditions
|
||||||
|
;* are met:
|
||||||
|
;*
|
||||||
|
;* * Redistributions of source code must retain the above copyright
|
||||||
|
;* notice, this list of conditions and the following disclaimer.
|
||||||
|
;*
|
||||||
|
;* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
;* notice, this list of conditions and the following disclaimer in
|
||||||
|
;* the documentation and/or other materials provided with the
|
||||||
|
;* distribution.
|
||||||
|
;*
|
||||||
|
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||||
|
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||||
|
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
;* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;*
|
||||||
|
;*
|
||||||
|
;* expand_picture.asm
|
||||||
|
;*
|
||||||
|
;* Abstract
|
||||||
|
;* mmxext/sse for expand_frame
|
||||||
|
;*
|
||||||
|
;* History
|
||||||
|
;* 09/25/2009 Created
|
||||||
|
;*
|
||||||
|
;*
|
||||||
|
;*************************************************************************/
|
||||||
|
|
||||||
|
%include "asm_inc.asm"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
;***********************************************************************
|
||||||
|
; Macros and other preprocessor constants
|
||||||
|
;***********************************************************************
|
||||||
|
|
||||||
|
;***********************************************************************
|
||||||
|
; Local Data (Read Only)
|
||||||
|
;***********************************************************************
|
||||||
|
|
||||||
|
;SECTION .rodata pData align=16
|
||||||
|
|
||||||
|
;***********************************************************************
|
||||||
|
; Various memory constants (trigonometric values or rounding values)
|
||||||
|
;***********************************************************************
|
||||||
|
;%define PADDING_SIZE_ASM 32 ; PADDING_LENGTH
|
||||||
|
|
||||||
|
;***********************************************************************
|
||||||
|
; Code
|
||||||
|
;***********************************************************************
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
SECTION .text
|
||||||
|
|
||||||
|
WELS_EXTERN ExpandPictureLuma_sse2
|
||||||
|
WELS_EXTERN ExpandPictureChromaAlign_sse2 ; for chroma alignment
|
||||||
|
WELS_EXTERN ExpandPictureChromaUnalign_sse2 ; for chroma unalignment
|
||||||
|
|
||||||
|
;;;;;;;expanding result;;;;;;;
|
||||||
|
|
||||||
|
;aaaa|attttttttttttttttb|bbbb
|
||||||
|
;aaaa|attttttttttttttttb|bbbb
|
||||||
|
;aaaa|attttttttttttttttb|bbbb
|
||||||
|
;aaaa|attttttttttttttttb|bbbb
|
||||||
|
;----------------------------
|
||||||
|
;aaaa|attttttttttttttttb|bbbb
|
||||||
|
;llll|l r|rrrr
|
||||||
|
;llll|l r|rrrr
|
||||||
|
;llll|l r|rrrr
|
||||||
|
;llll|l r|rrrr
|
||||||
|
;llll|l r|rrrr
|
||||||
|
;cccc|ceeeeeeeeeeeeeeeed|dddd
|
||||||
|
;----------------------------
|
||||||
|
;cccc|ceeeeeeeeeeeeeeeed|dddd
|
||||||
|
;cccc|ceeeeeeeeeeeeeeeed|dddd
|
||||||
|
;cccc|ceeeeeeeeeeeeeeeed|dddd
|
||||||
|
;cccc|ceeeeeeeeeeeeeeeed|dddd
|
||||||
|
|
||||||
|
%macro mov_line_8x4_mmx 3 ; dst, stride, mm?
|
||||||
|
movq [%1], %3
|
||||||
|
movq [%1+%2], %3
|
||||||
|
lea %1, [%1+2*%2]
|
||||||
|
movq [%1], %3
|
||||||
|
movq [%1+%2], %3
|
||||||
|
lea %1, [%1+2*%2]
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro mov_line_end8x4_mmx 3 ; dst, stride, mm?
|
||||||
|
movq [%1], %3
|
||||||
|
movq [%1+%2], %3
|
||||||
|
lea %1, [%1+2*%2]
|
||||||
|
movq [%1], %3
|
||||||
|
movq [%1+%2], %3
|
||||||
|
lea %1, [%1+%2]
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro mov_line_16x4_sse2 4 ; dst, stride, xmm?, u/a
|
||||||
|
movdq%4 [%1], %3 ; top(bottom)_0
|
||||||
|
movdq%4 [%1+%2], %3 ; top(bottom)_1
|
||||||
|
lea %1, [%1+2*%2]
|
||||||
|
movdq%4 [%1], %3 ; top(bottom)_2
|
||||||
|
movdq%4 [%1+%2], %3 ; top(bottom)_3
|
||||||
|
lea %1, [%1+2*%2]
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro mov_line_end16x4_sse2 4 ; dst, stride, xmm?, u/a
|
||||||
|
movdq%4 [%1], %3 ; top(bottom)_0
|
||||||
|
movdq%4 [%1+%2], %3 ; top(bottom)_1
|
||||||
|
lea %1, [%1+2*%2]
|
||||||
|
movdq%4 [%1], %3 ; top(bottom)_2
|
||||||
|
movdq%4 [%1+%2], %3 ; top(bottom)_3
|
||||||
|
lea %1, [%1+%2]
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro mov_line_32x4_sse2 3 ; dst, stride, xmm?
|
||||||
|
movdqa [%1], %3 ; top(bottom)_0
|
||||||
|
movdqa [%1+16], %3 ; top(bottom)_0
|
||||||
|
movdqa [%1+%2], %3 ; top(bottom)_1
|
||||||
|
movdqa [%1+%2+16], %3 ; top(bottom)_1
|
||||||
|
lea %1, [%1+2*%2]
|
||||||
|
movdqa [%1], %3 ; top(bottom)_2
|
||||||
|
movdqa [%1+16], %3 ; top(bottom)_2
|
||||||
|
movdqa [%1+%2], %3 ; top(bottom)_3
|
||||||
|
movdqa [%1+%2+16], %3 ; top(bottom)_3
|
||||||
|
lea %1, [%1+2*%2]
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro mov_line_end32x4_sse2 3 ; dst, stride, xmm?
|
||||||
|
movdqa [%1], %3 ; top(bottom)_0
|
||||||
|
movdqa [%1+16], %3 ; top(bottom)_0
|
||||||
|
movdqa [%1+%2], %3 ; top(bottom)_1
|
||||||
|
movdqa [%1+%2+16], %3 ; top(bottom)_1
|
||||||
|
lea %1, [%1+2*%2]
|
||||||
|
movdqa [%1], %3 ; top(bottom)_2
|
||||||
|
movdqa [%1+16], %3 ; top(bottom)_2
|
||||||
|
movdqa [%1+%2], %3 ; top(bottom)_3
|
||||||
|
movdqa [%1+%2+16], %3 ; top(bottom)_3
|
||||||
|
lea %1, [%1+%2]
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro exp_top_bottom_sse2 1 ; iPaddingSize [luma(32)/chroma(16)]
|
||||||
|
;r2 [width/16(8)]
|
||||||
|
;r0 [pSrc +0], r5 [pSrc -width] r1[-stride], 32(16) ;top
|
||||||
|
;r3 [pSrc +(h-1)*stride], r4 [pSrc + (h+31)*stride],32(16); bottom
|
||||||
|
|
||||||
|
%if %1 == 32 ; for luma
|
||||||
|
sar r2, 04h ; width / 16(8) pixels
|
||||||
|
.top_bottom_loops:
|
||||||
|
; top
|
||||||
|
movdqa xmm0, [r0] ; first line of picture pData
|
||||||
|
mov_line_16x4_sse2 r5, r1, xmm0, a ; dst, stride, xmm?
|
||||||
|
mov_line_16x4_sse2 r5, r1, xmm0, a
|
||||||
|
mov_line_16x4_sse2 r5, r1, xmm0, a
|
||||||
|
mov_line_16x4_sse2 r5, r1, xmm0, a
|
||||||
|
mov_line_16x4_sse2 r5, r1, xmm0, a ; dst, stride, xmm?
|
||||||
|
mov_line_16x4_sse2 r5, r1, xmm0, a
|
||||||
|
mov_line_16x4_sse2 r5, r1, xmm0, a
|
||||||
|
mov_line_end16x4_sse2 r5, r1, xmm0, a
|
||||||
|
|
||||||
|
; bottom
|
||||||
|
movdqa xmm1, [r3] ; last line of picture pData
|
||||||
|
mov_line_16x4_sse2 r4, r1, xmm1, a ; dst, stride, xmm?
|
||||||
|
mov_line_16x4_sse2 r4, r1, xmm1, a
|
||||||
|
mov_line_16x4_sse2 r4, r1, xmm1, a
|
||||||
|
mov_line_16x4_sse2 r4, r1, xmm1, a
|
||||||
|
mov_line_16x4_sse2 r4, r1, xmm1, a ; dst, stride, xmm?
|
||||||
|
mov_line_16x4_sse2 r4, r1, xmm1, a
|
||||||
|
mov_line_16x4_sse2 r4, r1, xmm1, a
|
||||||
|
mov_line_end16x4_sse2 r4, r1, xmm1, a
|
||||||
|
|
||||||
|
lea r0, [r0+16] ; top pSrc
|
||||||
|
lea r5, [r5+16] ; top dst
|
||||||
|
lea r3, [r3+16] ; bottom pSrc
|
||||||
|
lea r4, [r4+16] ; bottom dst
|
||||||
|
neg r1 ; positive/negative stride need for next loop?
|
||||||
|
|
||||||
|
dec r2
|
||||||
|
jnz near .top_bottom_loops
|
||||||
|
%elif %1 == 16 ; for chroma ??
|
||||||
|
mov r6, r2
|
||||||
|
sar r2, 04h ; (width / 16) pixels
|
||||||
|
.top_bottom_loops:
|
||||||
|
; top
|
||||||
|
movdqa xmm0, [r0] ; first line of picture pData
|
||||||
|
mov_line_16x4_sse2 r5, r1, xmm0, a ; dst, stride, xmm?
|
||||||
|
mov_line_16x4_sse2 r5, r1, xmm0, a
|
||||||
|
mov_line_16x4_sse2 r5, r1, xmm0, a
|
||||||
|
mov_line_end16x4_sse2 r5, r1, xmm0, a
|
||||||
|
|
||||||
|
; bottom
|
||||||
|
movdqa xmm1, [r3] ; last line of picture pData
|
||||||
|
mov_line_16x4_sse2 r4, r1, xmm1, a ; dst, stride, xmm?
|
||||||
|
mov_line_16x4_sse2 r4, r1, xmm1, a
|
||||||
|
mov_line_16x4_sse2 r4, r1, xmm1, a
|
||||||
|
mov_line_end16x4_sse2 r4, r1, xmm1, a
|
||||||
|
|
||||||
|
lea r0, [r0+16] ; top pSrc
|
||||||
|
lea r5, [r5+16] ; top dst
|
||||||
|
lea r3, [r3+16] ; bottom pSrc
|
||||||
|
lea r4, [r4+16] ; bottom dst
|
||||||
|
neg r1 ; positive/negative stride need for next loop?
|
||||||
|
|
||||||
|
dec r2
|
||||||
|
jnz near .top_bottom_loops
|
||||||
|
|
||||||
|
; for remaining 8 bytes
|
||||||
|
and r6, 0fh ; any 8 bytes left?
|
||||||
|
test r6, r6
|
||||||
|
jz near .to_be_continued ; no left to exit here
|
||||||
|
|
||||||
|
; top
|
||||||
|
movq mm0, [r0] ; remained 8 byte
|
||||||
|
mov_line_8x4_mmx r5, r1, mm0 ; dst, stride, mm?
|
||||||
|
mov_line_8x4_mmx r5, r1, mm0 ; dst, stride, mm?
|
||||||
|
mov_line_8x4_mmx r5, r1, mm0 ; dst, stride, mm?
|
||||||
|
mov_line_end8x4_mmx r5, r1, mm0 ; dst, stride, mm?
|
||||||
|
; bottom
|
||||||
|
movq mm1, [r3]
|
||||||
|
mov_line_8x4_mmx r4, r1, mm1 ; dst, stride, mm?
|
||||||
|
mov_line_8x4_mmx r4, r1, mm1 ; dst, stride, mm?
|
||||||
|
mov_line_8x4_mmx r4, r1, mm1 ; dst, stride, mm?
|
||||||
|
mov_line_end8x4_mmx r4, r1, mm1 ; dst, stride, mm?
|
||||||
|
WELSEMMS
|
||||||
|
|
||||||
|
.to_be_continued:
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro exp_left_right_sse2 2 ; iPaddingSize [luma(32)/chroma(16)], u/a
|
||||||
|
;r6 [height]
|
||||||
|
;r0 [pSrc+0] r5[pSrc-32] r1[stride]
|
||||||
|
;r3 [pSrc+(w-1)] r4[pSrc+w]
|
||||||
|
|
||||||
|
%if %1 == 32 ; for luma
|
||||||
|
.left_right_loops:
|
||||||
|
; left
|
||||||
|
movzx r2d, byte [r0] ; pixel pData for left border
|
||||||
|
SSE2_Copy16Times xmm0, r2d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
||||||
|
movdqa [r5], xmm0
|
||||||
|
movdqa [r5+16], xmm0
|
||||||
|
|
||||||
|
; right
|
||||||
|
movzx r2d, byte [r3]
|
||||||
|
SSE2_Copy16Times xmm1, r2d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
||||||
|
movdqa [r4], xmm1
|
||||||
|
movdqa [r4+16], xmm1
|
||||||
|
|
||||||
|
lea r0, [r0+r1] ; left pSrc
|
||||||
|
lea r5, [r5+r1] ; left dst
|
||||||
|
lea r3, [r3+r1] ; right pSrc
|
||||||
|
lea r4, [r4+r1] ; right dst
|
||||||
|
|
||||||
|
dec r6
|
||||||
|
jnz near .left_right_loops
|
||||||
|
%elif %1 == 16 ; for chroma ??
|
||||||
|
.left_right_loops:
|
||||||
|
; left
|
||||||
|
movzx r2d, byte [r0] ; pixel pData for left border
|
||||||
|
SSE2_Copy16Times xmm0, r2d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
||||||
|
movdqa [r5], xmm0
|
||||||
|
|
||||||
|
; right
|
||||||
|
movzx r2d, byte [r3]
|
||||||
|
SSE2_Copy16Times xmm1, r2d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
||||||
|
movdq%2 [r4], xmm1 ; might not be aligned 16 bytes in case chroma planes
|
||||||
|
|
||||||
|
lea r0, [r0+r1] ; left pSrc
|
||||||
|
lea r5, [r5+r1] ; left dst
|
||||||
|
lea r3, [r3+r1] ; right pSrc
|
||||||
|
lea r4, [r4+r1] ; right dst
|
||||||
|
|
||||||
|
dec r6
|
||||||
|
jnz near .left_right_loops
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro exp_cross_sse2 2 ; iPaddingSize [luma(32)/chroma(16)], u/a
|
||||||
|
; top-left: (x)mm3, top-right: (x)mm4, bottom-left: (x)mm5, bottom-right: (x)mm6
|
||||||
|
; edi: TL, ebp: TR, eax: BL, ebx: BR, ecx, -stride
|
||||||
|
;r3:TL ,r4:TR,r5:BL,r6:BR r1:-stride
|
||||||
|
%if %1 == 32 ; luma
|
||||||
|
; TL
|
||||||
|
mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
|
||||||
|
mov_line_end32x4_sse2 r3, r1, xmm3 ; dst, stride, xmm?
|
||||||
|
|
||||||
|
; TR
|
||||||
|
mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
|
||||||
|
mov_line_end32x4_sse2 r4, r1, xmm4 ; dst, stride, xmm?
|
||||||
|
|
||||||
|
; BL
|
||||||
|
mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
|
||||||
|
mov_line_end32x4_sse2 r5, r1, xmm5 ; dst, stride, xmm?
|
||||||
|
|
||||||
|
; BR
|
||||||
|
mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
|
||||||
|
mov_line_32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
|
||||||
|
mov_line_end32x4_sse2 r6, r1, xmm6 ; dst, stride, xmm?
|
||||||
|
%elif %1 == 16 ; chroma
|
||||||
|
; TL
|
||||||
|
mov_line_16x4_sse2 r3, r1, xmm3, a ; dst, stride, xmm?
|
||||||
|
mov_line_16x4_sse2 r3, r1, xmm3, a ; dst, stride, xmm?
|
||||||
|
mov_line_16x4_sse2 r3, r1, xmm3, a ; dst, stride, xmm?
|
||||||
|
mov_line_end16x4_sse2 r3, r1, xmm3, a ; dst, stride, xmm?
|
||||||
|
|
||||||
|
; TR
|
||||||
|
mov_line_16x4_sse2 r4, r1, xmm4, %2 ; dst, stride, xmm?
|
||||||
|
mov_line_16x4_sse2 r4, r1, xmm4, %2 ; dst, stride, xmm?
|
||||||
|
mov_line_16x4_sse2 r4, r1, xmm4, %2 ; dst, stride, xmm?
|
||||||
|
mov_line_end16x4_sse2 r4, r1, xmm4, %2 ; dst, stride, xmm?
|
||||||
|
|
||||||
|
; BL
|
||||||
|
mov_line_16x4_sse2 r5, r1, xmm5, a ; dst, stride, xmm?
|
||||||
|
mov_line_16x4_sse2 r5, r1, xmm5, a ; dst, stride, xmm?
|
||||||
|
mov_line_16x4_sse2 r5, r1, xmm5, a ; dst, stride, xmm?
|
||||||
|
mov_line_end16x4_sse2 r5, r1, xmm5, a ; dst, stride, xmm?
|
||||||
|
|
||||||
|
; BR
|
||||||
|
mov_line_16x4_sse2 r6, r1, xmm6, %2 ; dst, stride, xmm?
|
||||||
|
mov_line_16x4_sse2 r6, r1, xmm6, %2 ; dst, stride, xmm?
|
||||||
|
mov_line_16x4_sse2 r6, r1, xmm6, %2 ; dst, stride, xmm?
|
||||||
|
mov_line_end16x4_sse2 r6, r1, xmm6, %2 ; dst, stride, xmm?
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
;***********************************************************************----------------
|
||||||
|
; void ExpandPictureLuma_sse2( uint8_t *pDst,
|
||||||
|
; const int32_t iStride,
|
||||||
|
; const int32_t iWidth,
|
||||||
|
; const int32_t iHeight );
|
||||||
|
;***********************************************************************----------------
|
||||||
|
ExpandPictureLuma_sse2:
|
||||||
|
|
||||||
|
push r4
|
||||||
|
push r5
|
||||||
|
push r6
|
||||||
|
|
||||||
|
%assign push_num 3
|
||||||
|
LOAD_4_PARA
|
||||||
|
|
||||||
|
SIGN_EXTENTION r1, r1d
|
||||||
|
SIGN_EXTENTION r2, r2d
|
||||||
|
SIGN_EXTENTION r3, r3d
|
||||||
|
|
||||||
|
;also prepare for cross border pData top-left:xmm3
|
||||||
|
|
||||||
|
movzx r6d,byte[r0]
|
||||||
|
SSE2_Copy16Times xmm3,r6d ;xmm3: pSrc[0]
|
||||||
|
|
||||||
|
neg r1
|
||||||
|
lea r5,[r0+r1] ;last line of top border r5= dst top pSrc[-stride]
|
||||||
|
neg r1
|
||||||
|
|
||||||
|
push r3
|
||||||
|
|
||||||
|
|
||||||
|
dec r3 ;h-1
|
||||||
|
imul r3,r1 ;(h-1)*stride
|
||||||
|
lea r3,[r0+r3] ;pSrc[(h-1)*stride] r3 = src bottom
|
||||||
|
|
||||||
|
mov r6,r1 ;r6 = stride
|
||||||
|
sal r6,05h ;r6 = 32*stride
|
||||||
|
lea r4,[r3+r6] ;r4 = dst bottom
|
||||||
|
|
||||||
|
;also prepare for cross border data: bottom-left with xmm5,bottom-right xmm6
|
||||||
|
|
||||||
|
movzx r6d,byte [r3] ;bottom-left
|
||||||
|
SSE2_Copy16Times xmm5,r6d
|
||||||
|
|
||||||
|
lea r6,[r3+r2-1]
|
||||||
|
movzx r6d,byte [r6]
|
||||||
|
SSE2_Copy16Times xmm6,r6d ;bottom-right
|
||||||
|
|
||||||
|
neg r1 ;r1 = -stride
|
||||||
|
|
||||||
|
push r0
|
||||||
|
push r1
|
||||||
|
push r2
|
||||||
|
|
||||||
|
exp_top_bottom_sse2 32
|
||||||
|
|
||||||
|
; for both left and right border
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
pop r2
|
||||||
|
pop r1
|
||||||
|
pop r0
|
||||||
|
|
||||||
|
lea r5,[r0-32] ;left border dst luma =32 chroma = -16
|
||||||
|
|
||||||
|
lea r3,[r0+r2-1] ;right border src
|
||||||
|
lea r4,[r3+1] ;right border dst
|
||||||
|
|
||||||
|
;prepare for cross border data: top-rigth with xmm4
|
||||||
|
movzx r6d,byte [r3] ;top -rigth
|
||||||
|
SSE2_Copy16Times xmm4,r6d
|
||||||
|
|
||||||
|
neg r1 ;r1 = stride
|
||||||
|
|
||||||
|
|
||||||
|
pop r6 ; r6 = height
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
push r0
|
||||||
|
push r1
|
||||||
|
push r2
|
||||||
|
push r6
|
||||||
|
|
||||||
|
exp_left_right_sse2 32,a
|
||||||
|
|
||||||
|
pop r6
|
||||||
|
pop r2
|
||||||
|
pop r1
|
||||||
|
pop r0
|
||||||
|
|
||||||
|
; for cross border [top-left, top-right, bottom-left, bottom-right]
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; have done xmm3,..,xmm6 cross pData initialization above, perform pading as below, To be continued..
|
||||||
|
|
||||||
|
neg r1 ;r1 = -stride
|
||||||
|
lea r3,[r0-32]
|
||||||
|
lea r3,[r3+r1] ;last line of top-left border
|
||||||
|
|
||||||
|
lea r4,[r0+r2] ;psrc +width
|
||||||
|
lea r4,[r4+r1] ;psrc +width -stride
|
||||||
|
|
||||||
|
|
||||||
|
neg r1 ;r1 = stride
|
||||||
|
add r6,32 ;height +32(16) ,luma = 32, chroma = 16
|
||||||
|
imul r6,r1
|
||||||
|
|
||||||
|
lea r5,[r3+r6] ;last line of bottom-left border
|
||||||
|
lea r6,[r4+r6] ;last line of botoom-right border
|
||||||
|
|
||||||
|
neg r1 ; r1 = -stride
|
||||||
|
|
||||||
|
; for left & right border expanding
|
||||||
|
exp_cross_sse2 32,a
|
||||||
|
|
||||||
|
LOAD_4_PARA_POP
|
||||||
|
|
||||||
|
pop r6
|
||||||
|
pop r5
|
||||||
|
pop r4
|
||||||
|
|
||||||
|
%assign push_num 0
|
||||||
|
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
;***********************************************************************----------------
|
||||||
|
; void ExpandPictureChromaAlign_sse2( uint8_t *pDst,
|
||||||
|
; const int32_t iStride,
|
||||||
|
; const int32_t iWidth,
|
||||||
|
; const int32_t iHeight );
|
||||||
|
;***********************************************************************----------------
|
||||||
|
ExpandPictureChromaAlign_sse2:
|
||||||
|
|
||||||
|
push r4
|
||||||
|
push r5
|
||||||
|
push r6
|
||||||
|
|
||||||
|
%assign push_num 3
|
||||||
|
LOAD_4_PARA
|
||||||
|
|
||||||
|
SIGN_EXTENTION r1,r1d
|
||||||
|
SIGN_EXTENTION r2,r2d
|
||||||
|
SIGN_EXTENTION r3,r3d
|
||||||
|
|
||||||
|
;also prepare for cross border pData top-left:xmm3
|
||||||
|
|
||||||
|
movzx r6d,byte [r0]
|
||||||
|
SSE2_Copy16Times xmm3,r6d ;xmm3: pSrc[0]
|
||||||
|
|
||||||
|
neg r1
|
||||||
|
lea r5,[r0+r1] ;last line of top border r5= dst top pSrc[-stride]
|
||||||
|
neg r1
|
||||||
|
|
||||||
|
push r3
|
||||||
|
|
||||||
|
|
||||||
|
dec r3 ;h-1
|
||||||
|
imul r3,r1 ;(h-1)*stride
|
||||||
|
lea r3,[r0+r3] ;pSrc[(h-1)*stride] r3 = src bottom
|
||||||
|
|
||||||
|
mov r6,r1 ;r6 = stride
|
||||||
|
sal r6,04h ;r6 = 32*stride
|
||||||
|
lea r4,[r3+r6] ;r4 = dst bottom
|
||||||
|
|
||||||
|
;also prepare for cross border data: bottom-left with xmm5,bottom-right xmm6
|
||||||
|
|
||||||
|
movzx r6d,byte [r3] ;bottom-left
|
||||||
|
SSE2_Copy16Times xmm5,r6d
|
||||||
|
|
||||||
|
lea r6,[r3+r2-1]
|
||||||
|
movzx r6d,byte [r6]
|
||||||
|
SSE2_Copy16Times xmm6,r6d ;bottom-right
|
||||||
|
|
||||||
|
neg r1 ;r1 = -stride
|
||||||
|
|
||||||
|
push r0
|
||||||
|
push r1
|
||||||
|
push r2
|
||||||
|
|
||||||
|
exp_top_bottom_sse2 16
|
||||||
|
|
||||||
|
; for both left and right border
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
pop r2
|
||||||
|
pop r1
|
||||||
|
pop r0
|
||||||
|
|
||||||
|
lea r5,[r0-16] ;left border dst luma =32 chroma = -16
|
||||||
|
|
||||||
|
lea r3,[r0+r2-1] ;right border src
|
||||||
|
lea r4,[r3+1] ;right border dst
|
||||||
|
|
||||||
|
;prepare for cross border data: top-rigth with xmm4
|
||||||
|
movzx r6d,byte [r3] ;top -rigth
|
||||||
|
SSE2_Copy16Times xmm4,r6d
|
||||||
|
|
||||||
|
neg r1 ;r1 = stride
|
||||||
|
|
||||||
|
|
||||||
|
pop r6 ; r6 = height
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
push r0
|
||||||
|
push r1
|
||||||
|
push r2
|
||||||
|
push r6
|
||||||
|
exp_left_right_sse2 16,a
|
||||||
|
|
||||||
|
pop r6
|
||||||
|
pop r2
|
||||||
|
pop r1
|
||||||
|
pop r0
|
||||||
|
|
||||||
|
; for cross border [top-left, top-right, bottom-left, bottom-right]
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; have done xmm3,..,xmm6 cross pData initialization above, perform pading as below, To be continued..
|
||||||
|
|
||||||
|
neg r1 ;r1 = -stride
|
||||||
|
lea r3,[r0-16]
|
||||||
|
lea r3,[r3+r1] ;last line of top-left border
|
||||||
|
|
||||||
|
lea r4,[r0+r2] ;psrc +width
|
||||||
|
lea r4,[r4+r1] ;psrc +width -stride
|
||||||
|
|
||||||
|
|
||||||
|
neg r1 ;r1 = stride
|
||||||
|
add r6,16 ;height +32(16) ,luma = 32, chroma = 16
|
||||||
|
imul r6,r1
|
||||||
|
|
||||||
|
lea r5,[r3+r6] ;last line of bottom-left border
|
||||||
|
lea r6,[r4+r6] ;last line of botoom-right border
|
||||||
|
|
||||||
|
neg r1 ; r1 = -stride
|
||||||
|
|
||||||
|
; for left & right border expanding
|
||||||
|
exp_cross_sse2 16,a
|
||||||
|
|
||||||
|
LOAD_4_PARA_POP
|
||||||
|
|
||||||
|
pop r6
|
||||||
|
pop r5
|
||||||
|
pop r4
|
||||||
|
|
||||||
|
%assign push_num 0
|
||||||
|
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
;***********************************************************************----------------
|
||||||
|
; void ExpandPictureChromaUnalign_sse2( uint8_t *pDst,
|
||||||
|
; const int32_t iStride,
|
||||||
|
; const int32_t iWidth,
|
||||||
|
; const int32_t iHeight );
|
||||||
|
;***********************************************************************----------------
|
||||||
|
ExpandPictureChromaUnalign_sse2:
|
||||||
|
push r4
|
||||||
|
push r5
|
||||||
|
push r6
|
||||||
|
|
||||||
|
%assign push_num 3
|
||||||
|
LOAD_4_PARA
|
||||||
|
|
||||||
|
SIGN_EXTENTION r1,r1d
|
||||||
|
SIGN_EXTENTION r2,r2d
|
||||||
|
SIGN_EXTENTION r3,r3d
|
||||||
|
|
||||||
|
;also prepare for cross border pData top-left:xmm3
|
||||||
|
|
||||||
|
movzx r6d,byte [r0]
|
||||||
|
SSE2_Copy16Times xmm3,r6d ;xmm3: pSrc[0]
|
||||||
|
|
||||||
|
neg r1
|
||||||
|
lea r5,[r0+r1] ;last line of top border r5= dst top pSrc[-stride]
|
||||||
|
neg r1
|
||||||
|
|
||||||
|
push r3
|
||||||
|
|
||||||
|
|
||||||
|
dec r3 ;h-1
|
||||||
|
imul r3,r1 ;(h-1)*stride
|
||||||
|
lea r3,[r0+r3] ;pSrc[(h-1)*stride] r3 = src bottom
|
||||||
|
|
||||||
|
mov r6,r1 ;r6 = stride
|
||||||
|
sal r6,04h ;r6 = 32*stride
|
||||||
|
lea r4,[r3+r6] ;r4 = dst bottom
|
||||||
|
|
||||||
|
;also prepare for cross border data: bottom-left with xmm5,bottom-right xmm6
|
||||||
|
|
||||||
|
movzx r6d,byte [r3] ;bottom-left
|
||||||
|
SSE2_Copy16Times xmm5,r6d
|
||||||
|
|
||||||
|
lea r6,[r3+r2-1]
|
||||||
|
movzx r6d,byte [r6]
|
||||||
|
SSE2_Copy16Times xmm6,r6d ;bottom-right
|
||||||
|
|
||||||
|
neg r1 ;r1 = -stride
|
||||||
|
|
||||||
|
push r0
|
||||||
|
push r1
|
||||||
|
push r2
|
||||||
|
|
||||||
|
exp_top_bottom_sse2 16
|
||||||
|
|
||||||
|
; for both left and right border
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
pop r2
|
||||||
|
pop r1
|
||||||
|
pop r0
|
||||||
|
|
||||||
|
lea r5,[r0-16] ;left border dst luma =32 chroma = -16
|
||||||
|
|
||||||
|
lea r3,[r0+r2-1] ;right border src
|
||||||
|
lea r4,[r3+1] ;right border dst
|
||||||
|
|
||||||
|
;prepare for cross border data: top-rigth with xmm4
|
||||||
|
movzx r6d,byte [r3] ;top -rigth
|
||||||
|
SSE2_Copy16Times xmm4,r6d
|
||||||
|
|
||||||
|
neg r1 ;r1 = stride
|
||||||
|
|
||||||
|
|
||||||
|
pop r6 ; r6 = height
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
push r0
|
||||||
|
push r1
|
||||||
|
push r2
|
||||||
|
push r6
|
||||||
|
exp_left_right_sse2 16,u
|
||||||
|
|
||||||
|
pop r6
|
||||||
|
pop r2
|
||||||
|
pop r1
|
||||||
|
pop r0
|
||||||
|
|
||||||
|
; for cross border [top-left, top-right, bottom-left, bottom-right]
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
; have done xmm3,..,xmm6 cross pData initialization above, perform pading as below, To be continued..
|
||||||
|
|
||||||
|
neg r1 ;r1 = -stride
|
||||||
|
lea r3,[r0-16]
|
||||||
|
lea r3,[r3+r1] ;last line of top-left border
|
||||||
|
|
||||||
|
lea r4,[r0+r2] ;psrc +width
|
||||||
|
lea r4,[r4+r1] ;psrc +width -stride
|
||||||
|
|
||||||
|
|
||||||
|
neg r1 ;r1 = stride
|
||||||
|
add r6,16 ;height +32(16) ,luma = 32, chroma = 16
|
||||||
|
imul r6,r1
|
||||||
|
|
||||||
|
lea r5,[r3+r6] ;last line of bottom-left border
|
||||||
|
lea r6,[r4+r6] ;last line of botoom-right border
|
||||||
|
|
||||||
|
neg r1 ; r1 = -stride
|
||||||
|
|
||||||
|
; for left & right border expanding
|
||||||
|
exp_cross_sse2 16,u
|
||||||
|
|
||||||
|
LOAD_4_PARA_POP
|
||||||
|
|
||||||
|
pop r6
|
||||||
|
pop r5
|
||||||
|
pop r4
|
||||||
|
|
||||||
|
%assign push_num 0
|
||||||
|
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
||||||
701
codec/common/mb_copy.asm
Normal file
701
codec/common/mb_copy.asm
Normal file
@@ -0,0 +1,701 @@
|
|||||||
|
;*!
|
||||||
|
;* \copy
|
||||||
|
;* Copyright (c) 2009-2013, Cisco Systems
|
||||||
|
;* All rights reserved.
|
||||||
|
;*
|
||||||
|
;* Redistribution and use in source and binary forms, with or without
|
||||||
|
;* modification, are permitted provided that the following conditions
|
||||||
|
;* are met:
|
||||||
|
;*
|
||||||
|
;* * Redistributions of source code must retain the above copyright
|
||||||
|
;* notice, this list of conditions and the following disclaimer.
|
||||||
|
;*
|
||||||
|
;* * Redistributions in binary form must reproduce the above copyright
|
||||||
|
;* notice, this list of conditions and the following disclaimer in
|
||||||
|
;* the documentation and/or other materials provided with the
|
||||||
|
;* distribution.
|
||||||
|
;*
|
||||||
|
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||||
|
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||||
|
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
;* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
;*
|
||||||
|
;*
|
||||||
|
;* mb_copy.asm
|
||||||
|
;*
|
||||||
|
;* Abstract
|
||||||
|
;* mb_copy and mb_copy1
|
||||||
|
;*
|
||||||
|
;* History
|
||||||
|
;* 15/09/2009 Created
|
||||||
|
;* 12/28/2009 Modified with larger throughput
|
||||||
|
;* 12/29/2011 Tuned WelsCopy16x16NotAligned_sse2, added UpdateMbMv_sse2 WelsCopy16x8NotAligned_sse2,
|
||||||
|
;* WelsCopy16x8_mmx, WelsCopy8x16_mmx etc;
|
||||||
|
;*
|
||||||
|
;*
|
||||||
|
;*********************************************************************************************/
|
||||||
|
%include "asm_inc.asm"
|
||||||
|
|
||||||
|
;***********************************************************************
|
||||||
|
; Macros and other preprocessor constants
|
||||||
|
;***********************************************************************
|
||||||
|
|
||||||
|
;***********************************************************************
|
||||||
|
; Code
|
||||||
|
;***********************************************************************
|
||||||
|
|
||||||
|
SECTION .text
|
||||||
|
|
||||||
|
WELS_EXTERN WelsCopy16x16_sse2
|
||||||
|
WELS_EXTERN WelsCopy16x16NotAligned_sse2
|
||||||
|
WELS_EXTERN WelsCopy8x8_mmx
|
||||||
|
WELS_EXTERN WelsCopy16x8NotAligned_sse2 ;
|
||||||
|
WELS_EXTERN WelsCopy8x16_mmx ;
|
||||||
|
WELS_EXTERN UpdateMbMv_sse2 ;
|
||||||
|
|
||||||
|
;***********************************************************************
|
||||||
|
; void WelsCopy16x16_sse2( uint8_t* Dst,
|
||||||
|
; int32_t iStrideD,
|
||||||
|
; uint8_t* Src,
|
||||||
|
; int32_t iStrideS )
|
||||||
|
;***********************************************************************
|
||||||
|
ALIGN 16
|
||||||
|
WelsCopy16x16_sse2:
|
||||||
|
|
||||||
|
push r4
|
||||||
|
push r5
|
||||||
|
%assign push_num 2
|
||||||
|
LOAD_4_PARA
|
||||||
|
|
||||||
|
lea r4, [r1+2*r1] ;ebx, [eax+2*eax] ; x3
|
||||||
|
lea r5, [r3+2*r3] ;edx, [ecx+2*ecx] ; x3
|
||||||
|
|
||||||
|
movdqa xmm0, [r2]
|
||||||
|
movdqa xmm1, [r2+r3]
|
||||||
|
movdqa xmm2, [r2+2*r3]
|
||||||
|
movdqa xmm3, [r2+r5]
|
||||||
|
lea r2, [r2+4*r3]
|
||||||
|
movdqa xmm4, [r2]
|
||||||
|
movdqa xmm5, [r2+r3]
|
||||||
|
movdqa xmm6, [r2+2*r3]
|
||||||
|
movdqa xmm7, [r2+r5]
|
||||||
|
lea r2, [r2+4*r3]
|
||||||
|
|
||||||
|
movdqa [r0], xmm0
|
||||||
|
movdqa [r0+r1], xmm1
|
||||||
|
movdqa [r0+2*r1], xmm2
|
||||||
|
movdqa [r0+r4], xmm3
|
||||||
|
lea r0, [r0+4*r1]
|
||||||
|
movdqa [r0], xmm4
|
||||||
|
movdqa [r0+r1], xmm5
|
||||||
|
movdqa [r0+2*r1], xmm6
|
||||||
|
movdqa [r0+r4], xmm7
|
||||||
|
lea r0, [r0+4*r1]
|
||||||
|
|
||||||
|
movdqa xmm0, [r2]
|
||||||
|
movdqa xmm1, [r2+r3]
|
||||||
|
movdqa xmm2, [r2+2*r3]
|
||||||
|
movdqa xmm3, [r2+r5]
|
||||||
|
lea r2, [r2+4*r3]
|
||||||
|
movdqa xmm4, [r2]
|
||||||
|
movdqa xmm5, [r2+r3]
|
||||||
|
movdqa xmm6, [r2+2*r3]
|
||||||
|
movdqa xmm7, [r2+r5]
|
||||||
|
|
||||||
|
movdqa [r0], xmm0
|
||||||
|
movdqa [r0+r1], xmm1
|
||||||
|
movdqa [r0+2*r1], xmm2
|
||||||
|
movdqa [r0+r4], xmm3
|
||||||
|
lea r0, [r0+4*r1]
|
||||||
|
movdqa [r0], xmm4
|
||||||
|
movdqa [r0+r1], xmm5
|
||||||
|
movdqa [r0+2*r1], xmm6
|
||||||
|
movdqa [r0+r4], xmm7
|
||||||
|
LOAD_4_PARA_POP
|
||||||
|
pop r5
|
||||||
|
pop r4
|
||||||
|
ret
|
||||||
|
|
||||||
|
;***********************************************************************
|
||||||
|
; void WelsCopy16x16NotAligned_sse2( uint8_t* Dst,
|
||||||
|
; int32_t iStrideD,
|
||||||
|
; uint8_t* Src,
|
||||||
|
; int32_t iStrideS )
|
||||||
|
;***********************************************************************
|
||||||
|
ALIGN 16
|
||||||
|
; dst can be align with 16 bytes, but not sure about pSrc, 12/29/2011
|
||||||
|
WelsCopy16x16NotAligned_sse2:
|
||||||
|
;push esi
|
||||||
|
;push edi
|
||||||
|
;push ebx
|
||||||
|
|
||||||
|
;mov edi, [esp+16] ; Dst
|
||||||
|
;mov eax, [esp+20] ; iStrideD
|
||||||
|
;mov esi, [esp+24] ; Src
|
||||||
|
;mov ecx, [esp+28] ; iStrideS
|
||||||
|
|
||||||
|
push r4
|
||||||
|
push r5
|
||||||
|
%assign push_num 2
|
||||||
|
LOAD_4_PARA
|
||||||
|
|
||||||
|
lea r4, [r1+2*r1] ;ebx, [eax+2*eax] ; x3
|
||||||
|
lea r5, [r3+2*r3] ;edx, [ecx+2*ecx] ; x3
|
||||||
|
|
||||||
|
movdqu xmm0, [r2]
|
||||||
|
movdqu xmm1, [r2+r3]
|
||||||
|
movdqu xmm2, [r2+2*r3]
|
||||||
|
movdqu xmm3, [r2+r5]
|
||||||
|
lea r2, [r2+4*r3]
|
||||||
|
movdqu xmm4, [r2]
|
||||||
|
movdqu xmm5, [r2+r3]
|
||||||
|
movdqu xmm6, [r2+2*r3]
|
||||||
|
movdqu xmm7, [r2+r5]
|
||||||
|
lea r2, [r2+4*r3]
|
||||||
|
|
||||||
|
movdqa [r0], xmm0
|
||||||
|
movdqa [r0+r1], xmm1
|
||||||
|
movdqa [r0+2*r1], xmm2
|
||||||
|
movdqa [r0+r4], xmm3
|
||||||
|
lea r0, [r0+4*r1]
|
||||||
|
movdqa [r0], xmm4
|
||||||
|
movdqa [r0+r1], xmm5
|
||||||
|
movdqa [r0+2*r1], xmm6
|
||||||
|
movdqa [r0+r4], xmm7
|
||||||
|
lea r0, [r0+4*r1]
|
||||||
|
|
||||||
|
movdqu xmm0, [r2]
|
||||||
|
movdqu xmm1, [r2+r3]
|
||||||
|
movdqu xmm2, [r2+2*r3]
|
||||||
|
movdqu xmm3, [r2+r5]
|
||||||
|
lea r2, [r2+4*r3]
|
||||||
|
movdqu xmm4, [r2]
|
||||||
|
movdqu xmm5, [r2+r3]
|
||||||
|
movdqu xmm6, [r2+2*r3]
|
||||||
|
movdqu xmm7, [r2+r5]
|
||||||
|
|
||||||
|
movdqa [r0], xmm0
|
||||||
|
movdqa [r0+r1], xmm1
|
||||||
|
movdqa [r0+2*r1], xmm2
|
||||||
|
movdqa [r0+r4], xmm3
|
||||||
|
lea r0, [r0+4*r1]
|
||||||
|
movdqa [r0], xmm4
|
||||||
|
movdqa [r0+r1], xmm5
|
||||||
|
movdqa [r0+2*r1], xmm6
|
||||||
|
movdqa [r0+r4], xmm7
|
||||||
|
LOAD_4_PARA_POP
|
||||||
|
pop r5
|
||||||
|
pop r4
|
||||||
|
ret
|
||||||
|
|
||||||
|
; , 12/29/2011
|
||||||
|
;***********************************************************************
|
||||||
|
; void WelsCopy16x8NotAligned_sse2(uint8_t* Dst,
|
||||||
|
; int32_t iStrideD,
|
||||||
|
; uint8_t* Src,
|
||||||
|
; int32_t iStrideS )
|
||||||
|
;***********************************************************************
|
||||||
|
ALIGN 16
|
||||||
|
WelsCopy16x8NotAligned_sse2:
|
||||||
|
;push esi
|
||||||
|
;push edi
|
||||||
|
;push ebx
|
||||||
|
|
||||||
|
;mov edi, [esp+16] ; Dst
|
||||||
|
;mov eax, [esp+20] ; iStrideD
|
||||||
|
;mov esi, [esp+24] ; Src
|
||||||
|
;mov ecx, [esp+28] ; iStrideS
|
||||||
|
|
||||||
|
push r4
|
||||||
|
push r5
|
||||||
|
%assign push_num 2
|
||||||
|
LOAD_4_PARA
|
||||||
|
|
||||||
|
lea r4, [r1+2*r1] ;ebx, [eax+2*eax] ; x3
|
||||||
|
lea r5, [r3+2*r3] ;edx, [ecx+2*ecx] ; x3
|
||||||
|
|
||||||
|
movdqu xmm0, [r2]
|
||||||
|
movdqu xmm1, [r2+r3]
|
||||||
|
movdqu xmm2, [r2+2*r3]
|
||||||
|
movdqu xmm3, [r2+r5]
|
||||||
|
lea r2, [r2+4*r3]
|
||||||
|
movdqu xmm4, [r2]
|
||||||
|
movdqu xmm5, [r2+r3]
|
||||||
|
movdqu xmm6, [r2+2*r3]
|
||||||
|
movdqu xmm7, [r2+r5]
|
||||||
|
|
||||||
|
movdqa [r0], xmm0
|
||||||
|
movdqa [r0+r1], xmm1
|
||||||
|
movdqa [r0+2*r1], xmm2
|
||||||
|
movdqa [r0+r4], xmm3
|
||||||
|
lea r0, [r0+4*r1]
|
||||||
|
movdqa [r0], xmm4
|
||||||
|
movdqa [r0+r1], xmm5
|
||||||
|
movdqa [r0+2*r1], xmm6
|
||||||
|
movdqa [r0+r4], xmm7
|
||||||
|
LOAD_4_PARA_POP
|
||||||
|
pop r5
|
||||||
|
pop r4
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
;***********************************************************************
|
||||||
|
; void WelsCopy8x16_mmx(uint8_t* Dst,
|
||||||
|
; int32_t iStrideD,
|
||||||
|
; uint8_t* Src,
|
||||||
|
; int32_t iStrideS )
|
||||||
|
;***********************************************************************
|
||||||
|
ALIGN 16
|
||||||
|
WelsCopy8x16_mmx:
|
||||||
|
;push ebx
|
||||||
|
|
||||||
|
;mov eax, [esp + 8 ] ;Dst
|
||||||
|
;mov ecx, [esp + 12] ;iStrideD
|
||||||
|
;mov ebx, [esp + 16] ;Src
|
||||||
|
;mov edx, [esp + 20] ;iStrideS
|
||||||
|
|
||||||
|
%assign push_num 0
|
||||||
|
LOAD_4_PARA
|
||||||
|
|
||||||
|
movq mm0, [r2]
|
||||||
|
movq mm1, [r2+r3]
|
||||||
|
lea r2, [r2+2*r3]
|
||||||
|
movq mm2, [r2]
|
||||||
|
movq mm3, [r2+r3]
|
||||||
|
lea r2, [r2+2*r3]
|
||||||
|
movq mm4, [r2]
|
||||||
|
movq mm5, [r2+r3]
|
||||||
|
lea r2, [r2+2*r3]
|
||||||
|
movq mm6, [r2]
|
||||||
|
movq mm7, [r2+r3]
|
||||||
|
lea r2, [r2+2*r3]
|
||||||
|
|
||||||
|
movq [r0], mm0
|
||||||
|
movq [r0+r1], mm1
|
||||||
|
lea r0, [r0+2*r1]
|
||||||
|
movq [r0], mm2
|
||||||
|
movq [r0+r1], mm3
|
||||||
|
lea r0, [r0+2*r1]
|
||||||
|
movq [r0], mm4
|
||||||
|
movq [r0+r1], mm5
|
||||||
|
lea r0, [r0+2*r1]
|
||||||
|
movq [r0], mm6
|
||||||
|
movq [r0+r1], mm7
|
||||||
|
lea r0, [r0+2*r1]
|
||||||
|
|
||||||
|
movq mm0, [r2]
|
||||||
|
movq mm1, [r2+r3]
|
||||||
|
lea r2, [r2+2*r3]
|
||||||
|
movq mm2, [r2]
|
||||||
|
movq mm3, [r2+r3]
|
||||||
|
lea r2, [r2+2*r3]
|
||||||
|
movq mm4, [r2]
|
||||||
|
movq mm5, [r2+r3]
|
||||||
|
lea r2, [r2+2*r3]
|
||||||
|
movq mm6, [r2]
|
||||||
|
movq mm7, [r2+r3]
|
||||||
|
|
||||||
|
movq [r0], mm0
|
||||||
|
movq [r0+r1], mm1
|
||||||
|
lea r0, [r0+2*r1]
|
||||||
|
movq [r0], mm2
|
||||||
|
movq [r0+r1], mm3
|
||||||
|
lea r0, [r0+2*r1]
|
||||||
|
movq [r0], mm4
|
||||||
|
movq [r0+r1], mm5
|
||||||
|
lea r0, [r0+2*r1]
|
||||||
|
movq [r0], mm6
|
||||||
|
movq [r0+r1], mm7
|
||||||
|
|
||||||
|
WELSEMMS
|
||||||
|
LOAD_4_PARA_POP
|
||||||
|
ret
|
||||||
|
|
||||||
|
;***********************************************************************
|
||||||
|
; void WelsCopy8x8_mmx( uint8_t* Dst,
|
||||||
|
; int32_t iStrideD,
|
||||||
|
; uint8_t* Src,
|
||||||
|
; int32_t iStrideS )
|
||||||
|
;***********************************************************************
|
||||||
|
ALIGN 16
|
||||||
|
WelsCopy8x8_mmx:
|
||||||
|
;push ebx
|
||||||
|
;push esi
|
||||||
|
;mov eax, [esp + 12] ;Dst
|
||||||
|
;mov ecx, [esp + 16] ;iStrideD
|
||||||
|
;mov esi, [esp + 20] ;Src
|
||||||
|
;mov ebx, [esp + 24] ;iStrideS
|
||||||
|
|
||||||
|
push r4
|
||||||
|
%assign push_num 1
|
||||||
|
LOAD_4_PARA
|
||||||
|
lea r4, [r3+2*r3] ;edx, [ebx+2*ebx]
|
||||||
|
|
||||||
|
; to prefetch next loop
|
||||||
|
prefetchnta [r2+2*r3]
|
||||||
|
prefetchnta [r2+r4]
|
||||||
|
movq mm0, [r2]
|
||||||
|
movq mm1, [r2+r3]
|
||||||
|
lea r2, [r2+2*r3]
|
||||||
|
; to prefetch next loop
|
||||||
|
prefetchnta [r2+2*r3]
|
||||||
|
prefetchnta [r2+r4]
|
||||||
|
movq mm2, [r2]
|
||||||
|
movq mm3, [r2+r3]
|
||||||
|
lea r2, [r2+2*r3]
|
||||||
|
; to prefetch next loop
|
||||||
|
prefetchnta [r2+2*r3]
|
||||||
|
prefetchnta [r2+r4]
|
||||||
|
movq mm4, [r2]
|
||||||
|
movq mm5, [r2+r3]
|
||||||
|
lea r2, [r2+2*r3]
|
||||||
|
movq mm6, [r2]
|
||||||
|
movq mm7, [r2+r3]
|
||||||
|
|
||||||
|
movq [r0], mm0
|
||||||
|
movq [r0+r1], mm1
|
||||||
|
lea r0, [r0+2*r1]
|
||||||
|
movq [r0], mm2
|
||||||
|
movq [r0+r1], mm3
|
||||||
|
lea r0, [r0+2*r1]
|
||||||
|
movq [r0], mm4
|
||||||
|
movq [r0+r1], mm5
|
||||||
|
lea r0, [r0+2*r1]
|
||||||
|
movq [r0], mm6
|
||||||
|
movq [r0+r1], mm7
|
||||||
|
|
||||||
|
WELSEMMS
|
||||||
|
;pop esi
|
||||||
|
;pop ebx
|
||||||
|
LOAD_4_PARA_POP
|
||||||
|
pop r4
|
||||||
|
ret
|
||||||
|
|
||||||
|
; (dunhuang@cisco), 12/21/2011
|
||||||
|
;***********************************************************************
|
||||||
|
; void UpdateMbMv_sse2( SMVUnitXY *pMvBuffer, const SMVUnitXY sMv )
|
||||||
|
;***********************************************************************
|
||||||
|
ALIGN 16
|
||||||
|
UpdateMbMv_sse2:
|
||||||
|
|
||||||
|
%assign push_num 0
|
||||||
|
LOAD_2_PARA
|
||||||
|
|
||||||
|
;mov eax, [esp+4] ; mv_buffer
|
||||||
|
;movd xmm0, [esp+8] ; _mv
|
||||||
|
movd xmm0, r1d ; _mv
|
||||||
|
pshufd xmm1, xmm0, $0
|
||||||
|
movdqa [r0 ], xmm1
|
||||||
|
movdqa [r0+0x10], xmm1
|
||||||
|
movdqa [r0+0x20], xmm1
|
||||||
|
movdqa [r0+0x30], xmm1
|
||||||
|
ret
|
||||||
|
|
||||||
|
;*******************************************************************************
|
||||||
|
; Macros and other preprocessor constants
|
||||||
|
;*******************************************************************************
|
||||||
|
|
||||||
|
;*******************************************************************************
|
||||||
|
; Local Data (Read Only)
|
||||||
|
;*******************************************************************************
|
||||||
|
|
||||||
|
;SECTION .rodata data align=16
|
||||||
|
|
||||||
|
;*******************************************************************************
|
||||||
|
; Various memory constants (trigonometric values or rounding values)
|
||||||
|
;*******************************************************************************
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
|
||||||
|
;*******************************************************************************
|
||||||
|
; Code
|
||||||
|
;*******************************************************************************
|
||||||
|
|
||||||
|
SECTION .text
|
||||||
|
|
||||||
|
WELS_EXTERN PixelAvgWidthEq4_mmx
|
||||||
|
WELS_EXTERN PixelAvgWidthEq8_mmx
|
||||||
|
WELS_EXTERN PixelAvgWidthEq16_sse2
|
||||||
|
|
||||||
|
WELS_EXTERN McCopyWidthEq4_mmx
|
||||||
|
WELS_EXTERN McCopyWidthEq8_mmx
|
||||||
|
WELS_EXTERN McCopyWidthEq16_sse2
|
||||||
|
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
;*******************************************************************************
|
||||||
|
; void_t PixelAvgWidthEq4_mmx( uint8_t *pDst, int iDstStride,
|
||||||
|
; uint8_t *pSrcA, int iSrcAStride,
|
||||||
|
; uint8_t *pSrcB, int iSrcBStride,
|
||||||
|
; int iHeight );
|
||||||
|
;*******************************************************************************
|
||||||
|
PixelAvgWidthEq4_mmx:
|
||||||
|
|
||||||
|
%assign push_num 0
|
||||||
|
LOAD_7_PARA
|
||||||
|
|
||||||
|
%ifndef X86_32
|
||||||
|
movsx r1, r1d
|
||||||
|
movsx r3, r3d
|
||||||
|
movsx r5, r5d
|
||||||
|
movsx r6, r6d
|
||||||
|
%endif
|
||||||
|
|
||||||
|
ALIGN 4
|
||||||
|
.height_loop:
|
||||||
|
movd mm0, [r4]
|
||||||
|
pavgb mm0, [r2]
|
||||||
|
movd [r0], mm0
|
||||||
|
|
||||||
|
dec r6
|
||||||
|
lea r0, [r0+r1]
|
||||||
|
lea r2, [r2+r3]
|
||||||
|
lea r4, [r4+r5]
|
||||||
|
jne .height_loop
|
||||||
|
|
||||||
|
WELSEMMS
|
||||||
|
LOAD_7_PARA_POP
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
;*******************************************************************************
|
||||||
|
; void_t PixelAvgWidthEq8_mmx( uint8_t *pDst, int iDstStride,
|
||||||
|
; uint8_t *pSrcA, int iSrcAStride,
|
||||||
|
; uint8_t *pSrcB, int iSrcBStride,
|
||||||
|
; int iHeight );
|
||||||
|
;*******************************************************************************
|
||||||
|
PixelAvgWidthEq8_mmx:
|
||||||
|
|
||||||
|
;push esi
|
||||||
|
;push edi
|
||||||
|
;push ebp
|
||||||
|
;push ebx
|
||||||
|
|
||||||
|
;mov edi, [esp+20] ; pDst
|
||||||
|
;mov eax, [esp+24] ; iDstStride
|
||||||
|
;mov esi, [esp+28] ; pSrcA
|
||||||
|
;mov ecx, [esp+32] ; iSrcAStride
|
||||||
|
;mov ebp, [esp+36] ; pSrcB
|
||||||
|
;mov edx, [esp+40] ; iSrcBStride
|
||||||
|
;mov ebx, [esp+44] ; iHeight
|
||||||
|
|
||||||
|
%assign push_num 0
|
||||||
|
LOAD_7_PARA
|
||||||
|
|
||||||
|
%ifndef X86_32
|
||||||
|
movsx r1, r1d
|
||||||
|
movsx r3, r3d
|
||||||
|
movsx r5, r5d
|
||||||
|
movsx r6, r6d
|
||||||
|
%endif
|
||||||
|
|
||||||
|
ALIGN 4
|
||||||
|
.height_loop:
|
||||||
|
movq mm0, [r2]
|
||||||
|
pavgb mm0, [r4]
|
||||||
|
movq [r0], mm0
|
||||||
|
movq mm0, [r2+r3]
|
||||||
|
pavgb mm0, [r4+r5]
|
||||||
|
movq [r0+r1], mm0
|
||||||
|
|
||||||
|
lea r2, [r2+2*r3]
|
||||||
|
lea r4, [r4+2*r5]
|
||||||
|
lea r0, [r0+2*r1]
|
||||||
|
|
||||||
|
sub r6, 2
|
||||||
|
jnz .height_loop
|
||||||
|
|
||||||
|
WELSEMMS
|
||||||
|
LOAD_7_PARA_POP
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
;*******************************************************************************
|
||||||
|
; void_t PixelAvgWidthEq16_sse2( uint8_t *pDst, int iDstStride,
|
||||||
|
; uint8_t *pSrcA, int iSrcAStride,
|
||||||
|
; uint8_t *pSrcB, int iSrcBStride,
|
||||||
|
; int iHeight );
|
||||||
|
;*******************************************************************************
|
||||||
|
PixelAvgWidthEq16_sse2:
|
||||||
|
|
||||||
|
%assign push_num 0
|
||||||
|
LOAD_7_PARA
|
||||||
|
%ifndef X86_32
|
||||||
|
movsx r1, r1d
|
||||||
|
movsx r3, r3d
|
||||||
|
movsx r5, r5d
|
||||||
|
movsx r6, r6d
|
||||||
|
%endif
|
||||||
|
ALIGN 4
|
||||||
|
.height_loop:
|
||||||
|
movdqu xmm0, [r2]
|
||||||
|
movdqu xmm1, [r4]
|
||||||
|
pavgb xmm0, xmm1
|
||||||
|
;pavgb xmm0, [r4]
|
||||||
|
movdqu [r0], xmm0
|
||||||
|
|
||||||
|
movdqu xmm0, [r2+r3]
|
||||||
|
movdqu xmm1, [r4+r5]
|
||||||
|
pavgb xmm0, xmm1
|
||||||
|
movdqu [r0+r1], xmm0
|
||||||
|
|
||||||
|
movdqu xmm0, [r2+2*r3]
|
||||||
|
movdqu xmm1, [r4+2*r5]
|
||||||
|
pavgb xmm0, xmm1
|
||||||
|
movdqu [r0+2*r1], xmm0
|
||||||
|
|
||||||
|
lea r2, [r2+2*r3]
|
||||||
|
lea r4, [r4+2*r5]
|
||||||
|
lea r0, [r0+2*r1]
|
||||||
|
|
||||||
|
movdqu xmm0, [r2+r3]
|
||||||
|
movdqu xmm1, [r4+r5]
|
||||||
|
pavgb xmm0, xmm1
|
||||||
|
movdqu [r0+r1], xmm0
|
||||||
|
|
||||||
|
lea r2, [r2+2*r3]
|
||||||
|
lea r4, [r4+2*r5]
|
||||||
|
lea r0, [r0+2*r1]
|
||||||
|
|
||||||
|
sub r6, 4
|
||||||
|
jne .height_loop
|
||||||
|
|
||||||
|
WELSEMMS
|
||||||
|
LOAD_7_PARA_POP
|
||||||
|
ret
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
;*******************************************************************************
|
||||||
|
; void_t McCopyWidthEq4_mmx( uint8_t *pSrc, int iSrcStride,
|
||||||
|
; uint8_t *pDst, int iDstStride, int iHeight )
|
||||||
|
;*******************************************************************************
|
||||||
|
McCopyWidthEq4_mmx:
|
||||||
|
;push esi
|
||||||
|
;push edi
|
||||||
|
;push ebx
|
||||||
|
|
||||||
|
|
||||||
|
;mov esi, [esp+16]
|
||||||
|
;mov eax, [esp+20]
|
||||||
|
;mov edi, [esp+24]
|
||||||
|
;mov ecx, [esp+28]
|
||||||
|
;mov edx, [esp+32]
|
||||||
|
|
||||||
|
push r5
|
||||||
|
%assign push_num 1
|
||||||
|
LOAD_5_PARA
|
||||||
|
|
||||||
|
%ifndef X86_32
|
||||||
|
movsx r1, r1d
|
||||||
|
movsx r3, r3d
|
||||||
|
movsx r4, r4d
|
||||||
|
%endif
|
||||||
|
|
||||||
|
ALIGN 4
|
||||||
|
.height_loop:
|
||||||
|
mov r5d, [r0]
|
||||||
|
mov [r2], r5d
|
||||||
|
|
||||||
|
add r0, r1
|
||||||
|
add r2, r3
|
||||||
|
dec r4
|
||||||
|
jnz .height_loop
|
||||||
|
WELSEMMS
|
||||||
|
LOAD_5_PARA_POP
|
||||||
|
pop r5
|
||||||
|
ret
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
;*******************************************************************************
|
||||||
|
; void_t McCopyWidthEq8_mmx( uint8_t *pSrc, int iSrcStride,
|
||||||
|
; uint8_t *pDst, int iDstStride, int iHeight )
|
||||||
|
;*******************************************************************************
|
||||||
|
McCopyWidthEq8_mmx:
|
||||||
|
;push esi
|
||||||
|
;push edi
|
||||||
|
;mov esi, [esp+12]
|
||||||
|
;mov eax, [esp+16]
|
||||||
|
;mov edi, [esp+20]
|
||||||
|
;mov ecx, [esp+24]
|
||||||
|
;mov edx, [esp+28]
|
||||||
|
|
||||||
|
%assign push_num 0
|
||||||
|
LOAD_5_PARA
|
||||||
|
|
||||||
|
%ifndef X86_32
|
||||||
|
movsx r1, r1d
|
||||||
|
movsx r3, r3d
|
||||||
|
movsx r4, r4d
|
||||||
|
%endif
|
||||||
|
|
||||||
|
ALIGN 4
|
||||||
|
.height_loop:
|
||||||
|
movq mm0, [r0]
|
||||||
|
movq [r2], mm0
|
||||||
|
add r0, r1
|
||||||
|
add r2, r3
|
||||||
|
dec r4
|
||||||
|
jnz .height_loop
|
||||||
|
|
||||||
|
WELSEMMS
|
||||||
|
LOAD_5_PARA_POP
|
||||||
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
ALIGN 16
|
||||||
|
;*******************************************************************************
|
||||||
|
; void_t McCopyWidthEq16_sse2( uint8_t *pSrc, int iSrcStride, uint8_t *pDst, int iDstStride, int iHeight )
|
||||||
|
;*******************************************************************************
|
||||||
|
;read unaligned memory
|
||||||
|
%macro SSE_READ_UNA 2
|
||||||
|
movq %1, [%2]
|
||||||
|
movhps %1, [%2+8]
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
;write unaligned memory
|
||||||
|
%macro SSE_WRITE_UNA 2
|
||||||
|
movq [%1], %2
|
||||||
|
movhps [%1+8], %2
|
||||||
|
%endmacro
|
||||||
|
McCopyWidthEq16_sse2:
|
||||||
|
;push esi
|
||||||
|
;push edi
|
||||||
|
|
||||||
|
;mov esi, [esp+12] ; pSrc
|
||||||
|
;mov eax, [esp+16] ; iSrcStride
|
||||||
|
;mov edi, [esp+20] ; pDst
|
||||||
|
;mov edx, [esp+24] ; iDstStride
|
||||||
|
;mov ecx, [esp+28] ; iHeight
|
||||||
|
|
||||||
|
%assign push_num 0
|
||||||
|
LOAD_5_PARA
|
||||||
|
%ifndef X86_32
|
||||||
|
movsx r1, r1d
|
||||||
|
movsx r3, r3d
|
||||||
|
movsx r4, r4d
|
||||||
|
%endif
|
||||||
|
ALIGN 4
|
||||||
|
.height_loop:
|
||||||
|
SSE_READ_UNA xmm0, r0
|
||||||
|
SSE_READ_UNA xmm1, r0+r1
|
||||||
|
SSE_WRITE_UNA r2, xmm0
|
||||||
|
SSE_WRITE_UNA r2+r3, xmm1
|
||||||
|
|
||||||
|
sub r4, 2
|
||||||
|
lea r0, [r0+r1*2]
|
||||||
|
lea r2, [r2+r3*2]
|
||||||
|
jnz .height_loop
|
||||||
|
|
||||||
|
LOAD_5_PARA_POP
|
||||||
|
ret
|
||||||
@@ -41,8 +41,6 @@
|
|||||||
;*************************************************************************/
|
;*************************************************************************/
|
||||||
%include "asm_inc.asm"
|
%include "asm_inc.asm"
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
; Local Data (Read Only)
|
; Local Data (Read Only)
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
@@ -78,12 +76,21 @@ ALIGN 16
|
|||||||
;*******************************************************************************
|
;*******************************************************************************
|
||||||
WELS_EXTERN McChromaWidthEq4_mmx
|
WELS_EXTERN McChromaWidthEq4_mmx
|
||||||
McChromaWidthEq4_mmx:
|
McChromaWidthEq4_mmx:
|
||||||
push esi
|
;push esi
|
||||||
push edi
|
;push edi
|
||||||
push ebx
|
;push ebx
|
||||||
|
|
||||||
mov eax, [esp +12 + 20]
|
%assign push_num 0
|
||||||
movd mm3, [eax]
|
LOAD_6_PARA
|
||||||
|
%ifndef X86_32
|
||||||
|
movsx r1, r1d
|
||||||
|
movsx r3, r3d
|
||||||
|
movsx r5, r5d
|
||||||
|
%endif
|
||||||
|
|
||||||
|
;mov eax, [esp +12 + 20]
|
||||||
|
|
||||||
|
movd mm3, [r4]; [eax]
|
||||||
WELS_Zero mm7
|
WELS_Zero mm7
|
||||||
punpcklbw mm3, mm3
|
punpcklbw mm3, mm3
|
||||||
movq mm4, mm3
|
movq mm4, mm3
|
||||||
@@ -98,15 +105,15 @@ McChromaWidthEq4_mmx:
|
|||||||
punpcklbw mm4, mm7
|
punpcklbw mm4, mm7
|
||||||
punpckhbw mm6, mm7
|
punpckhbw mm6, mm7
|
||||||
|
|
||||||
mov esi, [esp +12+ 4]
|
;mov esi, [esp +12+ 4]
|
||||||
mov eax, [esp + 12 + 8]
|
;mov eax, [esp + 12 + 8]
|
||||||
mov edi, [esp + 12 + 12]
|
;mov edi, [esp + 12 + 12]
|
||||||
mov edx, [esp + 12 + 16]
|
;mov edx, [esp + 12 + 16]
|
||||||
mov ecx, [esp + 12 + 24]
|
;mov ecx, [esp + 12 + 24]
|
||||||
|
|
||||||
lea ebx, [esi + eax]
|
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
|
||||||
movd mm0, [esi]
|
movd mm0, [r0]
|
||||||
movd mm1, [esi+1]
|
movd mm1, [r0+1]
|
||||||
punpcklbw mm0, mm7
|
punpcklbw mm0, mm7
|
||||||
punpcklbw mm1, mm7
|
punpcklbw mm1, mm7
|
||||||
.xloop:
|
.xloop:
|
||||||
@@ -115,13 +122,13 @@ McChromaWidthEq4_mmx:
|
|||||||
pmullw mm1, mm5
|
pmullw mm1, mm5
|
||||||
paddw mm0, mm1
|
paddw mm0, mm1
|
||||||
|
|
||||||
movd mm1, [ebx]
|
movd mm1, [r4]
|
||||||
punpcklbw mm1, mm7
|
punpcklbw mm1, mm7
|
||||||
movq mm2, mm1
|
movq mm2, mm1
|
||||||
pmullw mm1, mm4
|
pmullw mm1, mm4
|
||||||
paddw mm0, mm1
|
paddw mm0, mm1
|
||||||
|
|
||||||
movd mm1, [ebx+1]
|
movd mm1, [r4+1]
|
||||||
punpcklbw mm1, mm7
|
punpcklbw mm1, mm7
|
||||||
movq mm7, mm1
|
movq mm7, mm1
|
||||||
pmullw mm1,mm6
|
pmullw mm1,mm6
|
||||||
@@ -133,19 +140,20 @@ McChromaWidthEq4_mmx:
|
|||||||
|
|
||||||
WELS_Zero mm7
|
WELS_Zero mm7
|
||||||
packuswb mm0, mm7
|
packuswb mm0, mm7
|
||||||
movd [edi], mm0
|
movd [r2], mm0
|
||||||
|
|
||||||
movq mm0, mm2
|
movq mm0, mm2
|
||||||
|
|
||||||
lea edi, [edi +edx ]
|
lea r2, [r2 + r3]
|
||||||
lea ebx, [ebx + eax]
|
lea r4, [r4 + r1]
|
||||||
|
|
||||||
dec ecx
|
dec r5
|
||||||
jnz near .xloop
|
jnz near .xloop
|
||||||
WELSEMMS
|
WELSEMMS
|
||||||
pop ebx
|
LOAD_6_PARA_POP
|
||||||
pop edi
|
;pop ebx
|
||||||
pop esi
|
;pop edi
|
||||||
|
;pop esi
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
@@ -160,12 +168,20 @@ ALIGN 16
|
|||||||
;*******************************************************************************
|
;*******************************************************************************
|
||||||
WELS_EXTERN McChromaWidthEq8_sse2
|
WELS_EXTERN McChromaWidthEq8_sse2
|
||||||
McChromaWidthEq8_sse2:
|
McChromaWidthEq8_sse2:
|
||||||
push esi
|
;push esi
|
||||||
push edi
|
;push edi
|
||||||
push ebx
|
;push ebx
|
||||||
|
|
||||||
mov eax, [esp +12 + 20]
|
%assign push_num 0
|
||||||
movd xmm3, [eax]
|
LOAD_6_PARA
|
||||||
|
%ifndef X86_32
|
||||||
|
movsx r1, r1d
|
||||||
|
movsx r3, r3d
|
||||||
|
movsx r5, r5d
|
||||||
|
%endif
|
||||||
|
|
||||||
|
;mov eax, [esp +12 + 20]
|
||||||
|
movd xmm3, [r4]
|
||||||
WELS_Zero xmm7
|
WELS_Zero xmm7
|
||||||
punpcklbw xmm3, xmm3
|
punpcklbw xmm3, xmm3
|
||||||
punpcklwd xmm3, xmm3
|
punpcklwd xmm3, xmm3
|
||||||
@@ -181,15 +197,15 @@ McChromaWidthEq8_sse2:
|
|||||||
punpcklbw xmm4, xmm7
|
punpcklbw xmm4, xmm7
|
||||||
punpckhbw xmm6, xmm7
|
punpckhbw xmm6, xmm7
|
||||||
|
|
||||||
mov esi, [esp +12+ 4]
|
;mov esi, [esp +12+ 4]
|
||||||
mov eax, [esp + 12 + 8]
|
;mov eax, [esp + 12 + 8]
|
||||||
mov edi, [esp + 12 + 12]
|
;mov edi, [esp + 12 + 12]
|
||||||
mov edx, [esp + 12 + 16]
|
;mov edx, [esp + 12 + 16]
|
||||||
mov ecx, [esp + 12 + 24]
|
;mov ecx, [esp + 12 + 24]
|
||||||
|
|
||||||
lea ebx, [esi + eax]
|
lea r4, [r0 + r1] ;lea ebx, [esi + eax]
|
||||||
movq xmm0, [esi]
|
movq xmm0, [r0]
|
||||||
movq xmm1, [esi+1]
|
movq xmm1, [r0+1]
|
||||||
punpcklbw xmm0, xmm7
|
punpcklbw xmm0, xmm7
|
||||||
punpcklbw xmm1, xmm7
|
punpcklbw xmm1, xmm7
|
||||||
.xloop:
|
.xloop:
|
||||||
@@ -198,13 +214,13 @@ McChromaWidthEq8_sse2:
|
|||||||
pmullw xmm1, xmm5
|
pmullw xmm1, xmm5
|
||||||
paddw xmm0, xmm1
|
paddw xmm0, xmm1
|
||||||
|
|
||||||
movq xmm1, [ebx]
|
movq xmm1, [r4]
|
||||||
punpcklbw xmm1, xmm7
|
punpcklbw xmm1, xmm7
|
||||||
movdqa xmm2, xmm1
|
movdqa xmm2, xmm1
|
||||||
pmullw xmm1, xmm4
|
pmullw xmm1, xmm4
|
||||||
paddw xmm0, xmm1
|
paddw xmm0, xmm1
|
||||||
|
|
||||||
movq xmm1, [ebx+1]
|
movq xmm1, [r4+1]
|
||||||
punpcklbw xmm1, xmm7
|
punpcklbw xmm1, xmm7
|
||||||
movdqa xmm7, xmm1
|
movdqa xmm7, xmm1
|
||||||
pmullw xmm1, xmm6
|
pmullw xmm1, xmm6
|
||||||
@@ -216,19 +232,21 @@ McChromaWidthEq8_sse2:
|
|||||||
|
|
||||||
WELS_Zero xmm7
|
WELS_Zero xmm7
|
||||||
packuswb xmm0, xmm7
|
packuswb xmm0, xmm7
|
||||||
movq [edi], xmm0
|
movq [r2], xmm0
|
||||||
|
|
||||||
movdqa xmm0, xmm2
|
movdqa xmm0, xmm2
|
||||||
|
|
||||||
lea edi, [edi +edx ]
|
lea r2, [r2 + r3]
|
||||||
lea ebx, [ebx + eax]
|
lea r4, [r4 + r1]
|
||||||
|
|
||||||
dec ecx
|
dec r5
|
||||||
jnz near .xloop
|
jnz near .xloop
|
||||||
|
|
||||||
pop ebx
|
LOAD_6_PARA_POP
|
||||||
pop edi
|
|
||||||
pop esi
|
;pop ebx
|
||||||
|
;pop edi
|
||||||
|
;pop esi
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
@@ -245,39 +263,46 @@ ALIGN 16
|
|||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
WELS_EXTERN McChromaWidthEq8_ssse3
|
WELS_EXTERN McChromaWidthEq8_ssse3
|
||||||
McChromaWidthEq8_ssse3:
|
McChromaWidthEq8_ssse3:
|
||||||
push ebx
|
;push ebx
|
||||||
push esi
|
;push esi
|
||||||
push edi
|
;push edi
|
||||||
|
%assign push_num 0
|
||||||
|
LOAD_6_PARA
|
||||||
|
%ifndef X86_32
|
||||||
|
movsx r1, r1d
|
||||||
|
movsx r3, r3d
|
||||||
|
movsx r5, r5d
|
||||||
|
%endif
|
||||||
|
|
||||||
mov eax, [esp + 12 + 20]
|
;mov eax, [esp + 12 + 20]
|
||||||
|
|
||||||
pxor xmm7, xmm7
|
pxor xmm7, xmm7
|
||||||
movd xmm5, [eax]
|
movd xmm5, [r4]
|
||||||
punpcklwd xmm5, xmm5
|
punpcklwd xmm5, xmm5
|
||||||
punpckldq xmm5, xmm5
|
punpckldq xmm5, xmm5
|
||||||
movdqa xmm6, xmm5
|
movdqa xmm6, xmm5
|
||||||
punpcklqdq xmm5, xmm5
|
punpcklqdq xmm5, xmm5
|
||||||
punpckhqdq xmm6, xmm6
|
punpckhqdq xmm6, xmm6
|
||||||
|
|
||||||
mov eax, [esp + 12 + 4]
|
;mov eax, [esp + 12 + 4]
|
||||||
mov edx, [esp + 12 + 8]
|
;mov edx, [esp + 12 + 8]
|
||||||
mov esi, [esp + 12 + 12]
|
;mov esi, [esp + 12 + 12]
|
||||||
mov edi, [esp + 12 + 16]
|
;mov edi, [esp + 12 + 16]
|
||||||
mov ecx, [esp + 12 + 24]
|
;mov ecx, [esp + 12 + 24]
|
||||||
|
|
||||||
sub esi, edi
|
sub r2, r3 ;sub esi, edi
|
||||||
sub esi, edi
|
sub r2, r3
|
||||||
movdqa xmm7, [h264_d0x20_sse2]
|
movdqa xmm7, [h264_d0x20_sse2]
|
||||||
|
|
||||||
movdqu xmm0, [eax]
|
movdqu xmm0, [r0]
|
||||||
movdqa xmm1, xmm0
|
movdqa xmm1, xmm0
|
||||||
psrldq xmm1, 1
|
psrldq xmm1, 1
|
||||||
punpcklbw xmm0, xmm1
|
punpcklbw xmm0, xmm1
|
||||||
|
|
||||||
.hloop_chroma:
|
.hloop_chroma:
|
||||||
lea esi, [esi+2*edi]
|
lea r2, [r2+2*r3]
|
||||||
|
|
||||||
movdqu xmm2, [eax+edx]
|
movdqu xmm2, [r0+r1]
|
||||||
movdqa xmm3, xmm2
|
movdqa xmm3, xmm2
|
||||||
psrldq xmm3, 1
|
psrldq xmm3, 1
|
||||||
punpcklbw xmm2, xmm3
|
punpcklbw xmm2, xmm3
|
||||||
@@ -289,10 +314,10 @@ McChromaWidthEq8_ssse3:
|
|||||||
paddw xmm0, xmm7
|
paddw xmm0, xmm7
|
||||||
psrlw xmm0, 6
|
psrlw xmm0, 6
|
||||||
packuswb xmm0, xmm0
|
packuswb xmm0, xmm0
|
||||||
movq [esi],xmm0
|
movq [r2],xmm0
|
||||||
|
|
||||||
lea eax, [eax+2*edx]
|
lea r0, [r0+2*r1]
|
||||||
movdqu xmm2, [eax]
|
movdqu xmm2, [r0]
|
||||||
movdqa xmm3, xmm2
|
movdqa xmm3, xmm2
|
||||||
psrldq xmm3, 1
|
psrldq xmm3, 1
|
||||||
punpcklbw xmm2, xmm3
|
punpcklbw xmm2, xmm3
|
||||||
@@ -304,13 +329,16 @@ McChromaWidthEq8_ssse3:
|
|||||||
paddw xmm4, xmm7
|
paddw xmm4, xmm7
|
||||||
psrlw xmm4, 6
|
psrlw xmm4, 6
|
||||||
packuswb xmm4, xmm4
|
packuswb xmm4, xmm4
|
||||||
movq [esi+edi],xmm4
|
movq [r2+r3],xmm4
|
||||||
|
|
||||||
sub ecx, 2
|
sub r5, 2
|
||||||
jnz .hloop_chroma
|
jnz .hloop_chroma
|
||||||
pop edi
|
|
||||||
pop esi
|
LOAD_6_PARA_POP
|
||||||
pop ebx
|
|
||||||
|
;pop edi
|
||||||
|
;pop esi
|
||||||
|
;pop ebx
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
1293
codec/common/mc_luma.asm
Normal file
1293
codec/common/mc_luma.asm
Normal file
File diff suppressed because it is too large
Load Diff
@@ -6,6 +6,14 @@ COMMON_CPP_SRCS=\
|
|||||||
COMMON_OBJS += $(COMMON_CPP_SRCS:.cpp=.o)
|
COMMON_OBJS += $(COMMON_CPP_SRCS:.cpp=.o)
|
||||||
ifeq ($(USE_ASM), Yes)
|
ifeq ($(USE_ASM), Yes)
|
||||||
COMMON_ASM_SRCS=\
|
COMMON_ASM_SRCS=\
|
||||||
|
$(COMMON_SRCDIR)/./asm_inc.asm\
|
||||||
|
$(COMMON_SRCDIR)/./cpuid.asm\
|
||||||
|
$(COMMON_SRCDIR)/./deblock.asm\
|
||||||
|
$(COMMON_SRCDIR)/./expand_picture.asm\
|
||||||
|
$(COMMON_SRCDIR)/./mb_copy.asm\
|
||||||
|
$(COMMON_SRCDIR)/./mc_chroma.asm\
|
||||||
|
$(COMMON_SRCDIR)/./mc_luma.asm\
|
||||||
|
$(COMMON_SRCDIR)/./vaa.asm\
|
||||||
|
|
||||||
COMMON_OBJS += $(COMMON_ASM_SRCS:.asm=.o)
|
COMMON_OBJS += $(COMMON_ASM_SRCS:.asm=.o)
|
||||||
endif
|
endif
|
||||||
@@ -14,6 +22,30 @@ OBJS += $(COMMON_OBJS)
|
|||||||
$(COMMON_SRCDIR)/./logging.o: $(COMMON_SRCDIR)/./logging.cpp
|
$(COMMON_SRCDIR)/./logging.o: $(COMMON_SRCDIR)/./logging.cpp
|
||||||
$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(COMMON_CFLAGS) $(COMMON_INCLUDES) -c -o $(COMMON_SRCDIR)/./logging.o $(COMMON_SRCDIR)/./logging.cpp
|
$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(COMMON_CFLAGS) $(COMMON_INCLUDES) -c -o $(COMMON_SRCDIR)/./logging.o $(COMMON_SRCDIR)/./logging.cpp
|
||||||
|
|
||||||
|
$(COMMON_SRCDIR)/./asm_inc.o: $(COMMON_SRCDIR)/./asm_inc.asm
|
||||||
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./asm_inc.o $(COMMON_SRCDIR)/./asm_inc.asm
|
||||||
|
|
||||||
|
$(COMMON_SRCDIR)/./cpuid.o: $(COMMON_SRCDIR)/./cpuid.asm
|
||||||
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./cpuid.o $(COMMON_SRCDIR)/./cpuid.asm
|
||||||
|
|
||||||
|
$(COMMON_SRCDIR)/./deblock.o: $(COMMON_SRCDIR)/./deblock.asm
|
||||||
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./deblock.o $(COMMON_SRCDIR)/./deblock.asm
|
||||||
|
|
||||||
|
$(COMMON_SRCDIR)/./expand_picture.o: $(COMMON_SRCDIR)/./expand_picture.asm
|
||||||
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./expand_picture.o $(COMMON_SRCDIR)/./expand_picture.asm
|
||||||
|
|
||||||
|
$(COMMON_SRCDIR)/./mb_copy.o: $(COMMON_SRCDIR)/./mb_copy.asm
|
||||||
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./mb_copy.o $(COMMON_SRCDIR)/./mb_copy.asm
|
||||||
|
|
||||||
|
$(COMMON_SRCDIR)/./mc_chroma.o: $(COMMON_SRCDIR)/./mc_chroma.asm
|
||||||
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./mc_chroma.o $(COMMON_SRCDIR)/./mc_chroma.asm
|
||||||
|
|
||||||
|
$(COMMON_SRCDIR)/./mc_luma.o: $(COMMON_SRCDIR)/./mc_luma.asm
|
||||||
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./mc_luma.o $(COMMON_SRCDIR)/./mc_luma.asm
|
||||||
|
|
||||||
|
$(COMMON_SRCDIR)/./vaa.o: $(COMMON_SRCDIR)/./vaa.asm
|
||||||
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(COMMON_ASMFLAGS) $(COMMON_ASM_INCLUDES) -o $(COMMON_SRCDIR)/./vaa.o $(COMMON_SRCDIR)/./vaa.asm
|
||||||
|
|
||||||
$(LIBPREFIX)common.$(LIBSUFFIX): $(COMMON_OBJS)
|
$(LIBPREFIX)common.$(LIBSUFFIX): $(COMMON_OBJS)
|
||||||
rm -f $(LIBPREFIX)common.$(LIBSUFFIX)
|
rm -f $(LIBPREFIX)common.$(LIBSUFFIX)
|
||||||
$(AR) cr $@ $(COMMON_OBJS)
|
$(AR) cr $@ $(COMMON_OBJS)
|
||||||
|
|||||||
236
codec/encoder/core/asm/vaa.asm → codec/common/vaa.asm
Normal file → Executable file
236
codec/encoder/core/asm/vaa.asm → codec/common/vaa.asm
Normal file → Executable file
@@ -42,7 +42,7 @@
|
|||||||
;*
|
;*
|
||||||
;*************************************************************************/
|
;*************************************************************************/
|
||||||
%include "asm_inc.asm"
|
%include "asm_inc.asm"
|
||||||
BITS 32
|
|
||||||
|
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
; Macros and other preprocessor constants
|
; Macros and other preprocessor constants
|
||||||
@@ -62,8 +62,8 @@ BITS 32
|
|||||||
|
|
||||||
|
|
||||||
%macro VAA_AVG_BLOCK_SSE2 6 ; dst, t0, t1, t2, t3, t4
|
%macro VAA_AVG_BLOCK_SSE2 6 ; dst, t0, t1, t2, t3, t4
|
||||||
movdqa %1, [esi ] ; line 0
|
movdqa %1, [r0 ] ; line 0
|
||||||
movdqa %2, [esi+ecx] ; line 1
|
movdqa %2, [r0+r1] ; line 1
|
||||||
movdqa %3, %1
|
movdqa %3, %1
|
||||||
punpcklbw %1, xmm7
|
punpcklbw %1, xmm7
|
||||||
punpckhbw %3, xmm7
|
punpckhbw %3, xmm7
|
||||||
@@ -72,8 +72,8 @@ BITS 32
|
|||||||
punpckhbw %2, xmm7
|
punpckhbw %2, xmm7
|
||||||
paddw %1, %4
|
paddw %1, %4
|
||||||
paddw %2, %3
|
paddw %2, %3
|
||||||
movdqa %3, [esi+ebx] ; line 2
|
movdqa %3, [r0+r2] ; line 2
|
||||||
movdqa %4, [esi+edx] ; line 3
|
movdqa %4, [r0+r3] ; line 3
|
||||||
movdqa %5, %3
|
movdqa %5, %3
|
||||||
punpcklbw %3, xmm7
|
punpcklbw %3, xmm7
|
||||||
punpckhbw %5, xmm7
|
punpckhbw %5, xmm7
|
||||||
@@ -105,8 +105,8 @@ BITS 32
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro VAA_AVG_BLOCK_SSSE3 6 ; dst, t0, t1, t2, t3, t4
|
%macro VAA_AVG_BLOCK_SSSE3 6 ; dst, t0, t1, t2, t3, t4
|
||||||
movdqa %1, [esi ] ; line 0
|
movdqa %1, [r0 ] ; line 0
|
||||||
movdqa %2, [esi+ecx] ; line 1
|
movdqa %2, [r0+r1] ; line 1
|
||||||
movdqa %3, %1
|
movdqa %3, %1
|
||||||
punpcklbw %1, xmm7
|
punpcklbw %1, xmm7
|
||||||
punpckhbw %3, xmm7
|
punpckhbw %3, xmm7
|
||||||
@@ -115,8 +115,8 @@ BITS 32
|
|||||||
punpckhbw %2, xmm7
|
punpckhbw %2, xmm7
|
||||||
paddw %1, %4
|
paddw %1, %4
|
||||||
paddw %2, %3
|
paddw %2, %3
|
||||||
movdqa %3, [esi+ebx] ; line 2
|
movdqa %3, [r0+r2] ; line 2
|
||||||
movdqa %4, [esi+edx] ; line 3
|
movdqa %4, [r0+r3] ; line 3
|
||||||
movdqa %5, %3
|
movdqa %5, %3
|
||||||
punpcklbw %3, xmm7
|
punpcklbw %3, xmm7
|
||||||
punpckhbw %5, xmm7
|
punpckhbw %5, xmm7
|
||||||
@@ -158,48 +158,53 @@ WELS_EXTERN AnalysisVaaInfoIntra_sse2
|
|||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
AnalysisVaaInfoIntra_sse2:
|
AnalysisVaaInfoIntra_sse2:
|
||||||
push ebx
|
|
||||||
push edx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
|
|
||||||
mov ebp, esp
|
%assign push_num 0
|
||||||
and ebp, 0fh
|
LOAD_2_PARA
|
||||||
sub esp, ebp
|
SIGN_EXTENTION r1,r1d
|
||||||
sub esp, 32
|
|
||||||
%define PUSH_SIZE 52 ; 20 + 32
|
|
||||||
|
|
||||||
mov esi, [esp+ebp+PUSH_SIZE+4] ; data_y
|
%ifdef X86_32
|
||||||
mov ecx, [esp+ebp+PUSH_SIZE+8] ; iLineSize
|
push r3
|
||||||
|
push r4
|
||||||
|
push r5
|
||||||
|
push r6
|
||||||
|
%assign push_num push_num+4
|
||||||
|
%endif
|
||||||
|
|
||||||
mov ebx, ecx
|
mov r5,r7
|
||||||
sal ebx, $1 ; iLineSize x 2 [ebx]
|
and r5,0fh
|
||||||
mov edx, ebx
|
sub r7,r5
|
||||||
add edx, ecx ; iLineSize x 3 [edx]
|
sub r7,32
|
||||||
mov eax, ebx
|
|
||||||
sal eax, $1 ; iLineSize x 4 [eax]
|
|
||||||
|
mov r2,r1
|
||||||
|
sal r2,$1 ;r2 = 2*iLineSize
|
||||||
|
mov r3,r2
|
||||||
|
add r3,r1 ;r3 = 3*iLineSize
|
||||||
|
|
||||||
|
mov r4,r2
|
||||||
|
sal r4,$1 ;r4 = 4*iLineSize
|
||||||
|
|
||||||
pxor xmm7, xmm7
|
pxor xmm7, xmm7
|
||||||
|
|
||||||
; loops
|
; loops
|
||||||
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
||||||
movq [esp], xmm0
|
movq [r7], xmm0
|
||||||
|
|
||||||
lea esi, [esi+eax]
|
lea r0, [r0+r4]
|
||||||
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
||||||
movq [esp+8], xmm0
|
movq [r7+8], xmm0
|
||||||
|
|
||||||
lea esi, [esi+eax]
|
lea r0, [r0+r4]
|
||||||
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
||||||
movq [esp+16], xmm0
|
movq [r7+16], xmm0
|
||||||
|
|
||||||
lea esi, [esi+eax]
|
lea r0, [r0+r4]
|
||||||
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
||||||
movq [esp+24], xmm0
|
movq [r7+24], xmm0
|
||||||
|
|
||||||
movdqa xmm0, [esp] ; block 0~7
|
movdqa xmm0, [r7] ; block 0~7
|
||||||
movdqa xmm1, [esp+16] ; block 8~15
|
movdqa xmm1, [r7+16] ; block 8~15
|
||||||
movdqa xmm2, xmm0
|
movdqa xmm2, xmm0
|
||||||
paddw xmm0, xmm1
|
paddw xmm0, xmm1
|
||||||
SUM_WORD_8x2_SSE2 xmm0, xmm3
|
SUM_WORD_8x2_SSE2 xmm0, xmm3
|
||||||
@@ -220,22 +225,26 @@ AnalysisVaaInfoIntra_sse2:
|
|||||||
pshufd xmm2, xmm1, 0B1h
|
pshufd xmm2, xmm1, 0B1h
|
||||||
paddd xmm1, xmm2
|
paddd xmm1, xmm2
|
||||||
|
|
||||||
movd ebx, xmm0
|
|
||||||
and ebx, 0ffffh ; effective low word truncated
|
|
||||||
mov ecx, ebx
|
|
||||||
imul ebx, ecx
|
|
||||||
sar ebx, $4
|
|
||||||
movd eax, xmm1
|
|
||||||
sub eax, ebx
|
|
||||||
|
|
||||||
%undef PUSH_SIZE
|
|
||||||
add esp, 32
|
movd r2d, xmm0
|
||||||
add esp, ebp
|
and r2, 0ffffh ; effective low work truncated
|
||||||
pop ebp
|
mov r3, r2
|
||||||
pop edi
|
imul r2, r3
|
||||||
pop esi
|
sar r2, $4
|
||||||
pop edx
|
movd retrd, xmm1
|
||||||
pop ebx
|
sub retrd, r2d
|
||||||
|
|
||||||
|
add r7,32
|
||||||
|
add r7,r5
|
||||||
|
|
||||||
|
%ifdef X86_32
|
||||||
|
pop r6
|
||||||
|
pop r5
|
||||||
|
pop r4
|
||||||
|
pop r3
|
||||||
|
%endif
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
WELS_EXTERN AnalysisVaaInfoIntra_ssse3
|
WELS_EXTERN AnalysisVaaInfoIntra_ssse3
|
||||||
@@ -244,48 +253,55 @@ WELS_EXTERN AnalysisVaaInfoIntra_ssse3
|
|||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
AnalysisVaaInfoIntra_ssse3:
|
AnalysisVaaInfoIntra_ssse3:
|
||||||
push ebx
|
|
||||||
push edx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
|
|
||||||
mov ebp, esp
|
%assign push_num 0
|
||||||
and ebp, 0fh
|
LOAD_2_PARA
|
||||||
sub esp, ebp
|
SIGN_EXTENTION r1,r1d
|
||||||
sub esp, 32
|
|
||||||
%define PUSH_SIZE 52 ; 20 + 32
|
|
||||||
|
|
||||||
mov esi, [esp+ebp+PUSH_SIZE+4] ; data_y
|
%ifdef X86_32
|
||||||
mov ecx, [esp+ebp+PUSH_SIZE+8] ; iLineSize
|
push r3
|
||||||
|
push r4
|
||||||
|
push r5
|
||||||
|
push r6
|
||||||
|
%assign push_num push_num+4
|
||||||
|
%endif
|
||||||
|
|
||||||
mov ebx, ecx
|
mov r5,r7
|
||||||
sal ebx, $1 ; iLineSize x 2 [ebx]
|
and r5,0fh
|
||||||
mov edx, ebx
|
sub r7,r5
|
||||||
add edx, ecx ; iLineSize x 3 [edx]
|
sub r7,32
|
||||||
mov eax, ebx
|
|
||||||
sal eax, $1 ; iLineSize x 4 [eax]
|
|
||||||
|
mov r2,r1
|
||||||
|
sal r2,$1 ;r2 = 2*iLineSize
|
||||||
|
mov r3,r2
|
||||||
|
add r3,r1 ;r3 = 3*iLineSize
|
||||||
|
|
||||||
|
mov r4,r2
|
||||||
|
sal r4,$1 ;r4 = 4*iLineSize
|
||||||
|
|
||||||
pxor xmm7, xmm7
|
pxor xmm7, xmm7
|
||||||
|
|
||||||
; loops
|
; loops
|
||||||
VAA_AVG_BLOCK_SSSE3 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
VAA_AVG_BLOCK_SSSE3 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
||||||
movq [esp], xmm0
|
movq [r7],xmm0
|
||||||
|
|
||||||
lea esi, [esi+eax]
|
lea r0,[r0+r4]
|
||||||
VAA_AVG_BLOCK_SSSE3 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6
|
VAA_AVG_BLOCK_SSSE3 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6
|
||||||
movq [esp+8], xmm1
|
movq [r7+8],xmm1
|
||||||
|
|
||||||
lea esi, [esi+eax]
|
|
||||||
|
lea r0,[r0+r4]
|
||||||
VAA_AVG_BLOCK_SSSE3 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
VAA_AVG_BLOCK_SSSE3 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
||||||
movq [esp+16], xmm0
|
movq [r7+16],xmm0
|
||||||
|
|
||||||
lea esi, [esi+eax]
|
lea r0,[r0+r4]
|
||||||
VAA_AVG_BLOCK_SSSE3 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6
|
VAA_AVG_BLOCK_SSSE3 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6
|
||||||
movq [esp+24], xmm1
|
movq [r7+24],xmm1
|
||||||
|
|
||||||
movdqa xmm0, [esp] ; block 0~7
|
|
||||||
movdqa xmm1, [esp+16] ; block 8~15
|
movdqa xmm0,[r7]
|
||||||
|
movdqa xmm1,[r7+16]
|
||||||
movdqa xmm2, xmm0
|
movdqa xmm2, xmm0
|
||||||
paddw xmm0, xmm1
|
paddw xmm0, xmm1
|
||||||
SUM_WORD_8x2_SSE2 xmm0, xmm3 ; better performance than that of phaddw sets
|
SUM_WORD_8x2_SSE2 xmm0, xmm3 ; better performance than that of phaddw sets
|
||||||
@@ -306,22 +322,24 @@ AnalysisVaaInfoIntra_ssse3:
|
|||||||
pshufd xmm2, xmm1, 0B1h
|
pshufd xmm2, xmm1, 0B1h
|
||||||
paddd xmm1, xmm2
|
paddd xmm1, xmm2
|
||||||
|
|
||||||
movd ebx, xmm0
|
|
||||||
and ebx, 0ffffh ; effective low work truncated
|
|
||||||
mov ecx, ebx
|
|
||||||
imul ebx, ecx
|
|
||||||
sar ebx, $4
|
|
||||||
movd eax, xmm1
|
|
||||||
sub eax, ebx
|
|
||||||
|
|
||||||
%undef PUSH_SIZE
|
movd r2d, xmm0
|
||||||
add esp, 32
|
and r2, 0ffffh ; effective low work truncated
|
||||||
add esp, ebp
|
mov r3, r2
|
||||||
pop ebp
|
imul r2, r3
|
||||||
pop edi
|
sar r2, $4
|
||||||
pop esi
|
movd retrd, xmm1
|
||||||
pop edx
|
sub retrd, r2d
|
||||||
pop ebx
|
|
||||||
|
add r7,32
|
||||||
|
add r7,r5
|
||||||
|
%ifdef X86_32
|
||||||
|
pop r6
|
||||||
|
pop r5
|
||||||
|
pop r4
|
||||||
|
pop r3
|
||||||
|
%endif
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
WELS_EXTERN MdInterAnalysisVaaInfo_sse41
|
WELS_EXTERN MdInterAnalysisVaaInfo_sse41
|
||||||
@@ -330,8 +348,9 @@ WELS_EXTERN MdInterAnalysisVaaInfo_sse41
|
|||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
MdInterAnalysisVaaInfo_sse41:
|
MdInterAnalysisVaaInfo_sse41:
|
||||||
mov eax, [esp+4]
|
%assign push_num 0
|
||||||
movdqa xmm0, [eax] ; load 4 sad_8x8
|
LOAD_1_PARA
|
||||||
|
movdqa xmm0,[r0]
|
||||||
pshufd xmm1, xmm0, 01Bh
|
pshufd xmm1, xmm0, 01Bh
|
||||||
paddd xmm1, xmm0
|
paddd xmm1, xmm0
|
||||||
pshufd xmm2, xmm1, 0B1h
|
pshufd xmm2, xmm1, 0B1h
|
||||||
@@ -347,15 +366,16 @@ MdInterAnalysisVaaInfo_sse41:
|
|||||||
paddd xmm4, xmm3
|
paddd xmm4, xmm3
|
||||||
pshufd xmm3, xmm4, 0B1h
|
pshufd xmm3, xmm4, 0B1h
|
||||||
paddd xmm3, xmm4
|
paddd xmm3, xmm4
|
||||||
movd eax, xmm3
|
movd r0d, xmm3
|
||||||
cmp eax, 20 ; INTER_VARIANCE_SAD_THRESHOLD
|
cmp r0d, 20 ; INTER_VARIANCE_SAD_THRESHOLD
|
||||||
|
|
||||||
jb near .threshold_exit
|
jb near .threshold_exit
|
||||||
pshufd xmm0, xmm0, 0B1h
|
pshufd xmm0, xmm0, 01Bh
|
||||||
pcmpgtd xmm0, xmm1 ; iSadBlock > iAverageSad
|
pcmpgtd xmm0, xmm1 ; iSadBlock > iAverageSad
|
||||||
movmskps eax, xmm0
|
movmskps retrd, xmm0
|
||||||
ret
|
ret
|
||||||
.threshold_exit:
|
.threshold_exit:
|
||||||
mov eax, 15
|
mov retrd, 15
|
||||||
ret
|
ret
|
||||||
|
|
||||||
WELS_EXTERN MdInterAnalysisVaaInfo_sse2
|
WELS_EXTERN MdInterAnalysisVaaInfo_sse2
|
||||||
@@ -364,8 +384,9 @@ WELS_EXTERN MdInterAnalysisVaaInfo_sse2
|
|||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
MdInterAnalysisVaaInfo_sse2:
|
MdInterAnalysisVaaInfo_sse2:
|
||||||
mov eax, [esp+4]
|
%assign push_num 0
|
||||||
movdqa xmm0, [eax] ; load 4 sad_8x8
|
LOAD_1_PARA
|
||||||
|
movdqa xmm0, [r0]
|
||||||
pshufd xmm1, xmm0, 01Bh
|
pshufd xmm1, xmm0, 01Bh
|
||||||
paddd xmm1, xmm0
|
paddd xmm1, xmm0
|
||||||
pshufd xmm2, xmm1, 0B1h
|
pshufd xmm2, xmm1, 0B1h
|
||||||
@@ -391,13 +412,14 @@ MdInterAnalysisVaaInfo_sse2:
|
|||||||
paddd xmm4, xmm5
|
paddd xmm4, xmm5
|
||||||
pshufd xmm5, xmm4, 0B1h
|
pshufd xmm5, xmm4, 0B1h
|
||||||
paddd xmm5, xmm4
|
paddd xmm5, xmm4
|
||||||
movd eax, xmm5
|
|
||||||
cmp eax, 20 ; INTER_VARIANCE_SAD_THRESHOLD
|
movd r0d, xmm5
|
||||||
|
cmp r0d, 20 ; INTER_VARIANCE_SAD_THRESHOLD
|
||||||
jb near .threshold_exit
|
jb near .threshold_exit
|
||||||
pshufd xmm0, xmm0, 0B1h
|
pshufd xmm0, xmm0, 01Bh
|
||||||
pcmpgtd xmm0, xmm1 ; iSadBlock > iAverageSad
|
pcmpgtd xmm0, xmm1 ; iSadBlock > iAverageSad
|
||||||
movmskps eax, xmm0
|
movmskps retrd, xmm0
|
||||||
ret
|
ret
|
||||||
.threshold_exit:
|
.threshold_exit:
|
||||||
mov eax, 15
|
mov retrd, 15
|
||||||
ret
|
ret
|
||||||
@@ -42,262 +42,6 @@
|
|||||||
|
|
||||||
%include "asm_inc.asm"
|
%include "asm_inc.asm"
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; Macros and other preprocessor constants
|
|
||||||
;*******************************************************************************
|
|
||||||
|
|
||||||
%macro BLOCK_ADD_16_SSE2 4
|
|
||||||
movdqa xmm0, [%2]
|
|
||||||
movdqa xmm1, [%3]
|
|
||||||
movdqa xmm2, [%3+10h]
|
|
||||||
movdqa xmm6, xmm0
|
|
||||||
|
|
||||||
punpcklbw xmm0, xmm7
|
|
||||||
punpckhbw xmm6, xmm7
|
|
||||||
|
|
||||||
paddw xmm0, xmm1
|
|
||||||
paddw xmm6, xmm2
|
|
||||||
|
|
||||||
packuswb xmm0, xmm6
|
|
||||||
movdqa [%1], xmm0
|
|
||||||
|
|
||||||
lea %2, [%2+%4]
|
|
||||||
lea %3, [%3+%4*2]
|
|
||||||
lea %1, [%1+%4]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro BLOCK_ADD_8_MMXEXT 4
|
|
||||||
movq mm0, [%2]
|
|
||||||
movq mm1, [%3]
|
|
||||||
movq mm2, [%3+08h]
|
|
||||||
movq mm6, mm0
|
|
||||||
|
|
||||||
punpcklbw mm0, mm7
|
|
||||||
punpckhbw mm6, mm7
|
|
||||||
|
|
||||||
paddw mm0, mm1
|
|
||||||
paddw mm6, mm2
|
|
||||||
|
|
||||||
packuswb mm0, mm6
|
|
||||||
movq [%1], mm0
|
|
||||||
|
|
||||||
lea %2, [%2+%4]
|
|
||||||
lea %3, [%3+%4*2]
|
|
||||||
lea %1, [%1+%4]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
|
|
||||||
%macro BLOCK_ADD_16_STRIDE_SSE2 5
|
|
||||||
movdqa xmm0, [%2]
|
|
||||||
movdqa xmm1, [%3]
|
|
||||||
movdqa xmm2, [%3+10h]
|
|
||||||
movdqa xmm6, xmm0
|
|
||||||
|
|
||||||
punpcklbw xmm0, xmm7
|
|
||||||
punpckhbw xmm6, xmm7
|
|
||||||
|
|
||||||
paddw xmm0, xmm1
|
|
||||||
paddw xmm6, xmm2
|
|
||||||
|
|
||||||
packuswb xmm0, xmm6
|
|
||||||
movdqa [%1], xmm0
|
|
||||||
|
|
||||||
lea %2, [%2+%4]
|
|
||||||
lea %3, [%3+%5*2]
|
|
||||||
lea %1, [%1+%4]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
|
|
||||||
%macro BLOCK_ADD_8_STRIDE_MMXEXT 5
|
|
||||||
movq mm0, [%2]
|
|
||||||
movq mm1, [%3]
|
|
||||||
movq mm2, [%3+08h]
|
|
||||||
movq mm6, mm0
|
|
||||||
|
|
||||||
punpcklbw mm0, mm7
|
|
||||||
punpckhbw mm6, mm7
|
|
||||||
|
|
||||||
paddw mm0, mm1
|
|
||||||
paddw mm6, mm2
|
|
||||||
|
|
||||||
packuswb mm0, mm6
|
|
||||||
movq [%1], mm0
|
|
||||||
|
|
||||||
lea %2, [%2+%4]
|
|
||||||
lea %3, [%3+%5*2]
|
|
||||||
lea %1, [%1+%4]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro BLOCK_ADD_8_STRIDE_2_LINES_SSE2 5
|
|
||||||
movdqa xmm1, [%3]
|
|
||||||
movq xmm0, [%2]
|
|
||||||
punpcklbw xmm0, xmm7
|
|
||||||
paddw xmm0, xmm1
|
|
||||||
packuswb xmm0, xmm7
|
|
||||||
movq [%1], xmm0
|
|
||||||
|
|
||||||
movdqa xmm3, [%3+%5*2]
|
|
||||||
movq xmm2, [%2+%4]
|
|
||||||
punpcklbw xmm2, xmm7
|
|
||||||
paddw xmm2, xmm3
|
|
||||||
packuswb xmm2, xmm7
|
|
||||||
movq [%1+%4], xmm2
|
|
||||||
|
|
||||||
lea %1, [%1+%4*2]
|
|
||||||
lea %2, [%2+%4*2]
|
|
||||||
lea %3, [%3+%5*4]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro CHECK_DATA_16_ZERO_SSE4 3
|
|
||||||
mov eax, 0h
|
|
||||||
movdqa xmm0, [%1]
|
|
||||||
movdqa xmm1, [%1+10h]
|
|
||||||
mov ebx, [ecx]
|
|
||||||
|
|
||||||
por xmm0, xmm1
|
|
||||||
ptest xmm7, xmm0
|
|
||||||
cmovae eax, %3
|
|
||||||
|
|
||||||
add %1, 20h
|
|
||||||
add ecx, 04h
|
|
||||||
mov byte [%2+ebx], al
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro CHECK_RS_4x4_BLOCK_2_ZERO_SSE4 5
|
|
||||||
movdqa xmm0, [%1]
|
|
||||||
movdqa xmm1, [%1+%3]
|
|
||||||
movdqa xmm2, [%1+%3*2]
|
|
||||||
movdqa xmm3, [%1+%4]
|
|
||||||
|
|
||||||
mov eax, 0h
|
|
||||||
mov ebx, 0h
|
|
||||||
movdqa xmm4, xmm0
|
|
||||||
movdqa xmm5, xmm2
|
|
||||||
|
|
||||||
punpcklqdq xmm0, xmm1
|
|
||||||
punpckhqdq xmm4, xmm1
|
|
||||||
punpcklqdq xmm2, xmm3
|
|
||||||
punpckhqdq xmm5, xmm3
|
|
||||||
|
|
||||||
por xmm0, xmm2
|
|
||||||
por xmm4, xmm5
|
|
||||||
|
|
||||||
ptest xmm7, xmm0
|
|
||||||
cmovae eax, %5
|
|
||||||
ptest xmm7, xmm4
|
|
||||||
cmovae ebx, %5
|
|
||||||
|
|
||||||
mov byte [%2], al
|
|
||||||
mov byte [%2+1], bl
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro DATA_COPY_16x2_SSE2 3
|
|
||||||
movdqa xmm0, [%1]
|
|
||||||
movdqa xmm1, [%1+10h]
|
|
||||||
movdqa xmm2, [%1+%3]
|
|
||||||
movdqa xmm3, [%1+%3+10h]
|
|
||||||
|
|
||||||
movdqa [%2], xmm0
|
|
||||||
movdqa [%2+10h], xmm1
|
|
||||||
movdqa [%2+20h], xmm2
|
|
||||||
movdqa [%2+30h], xmm3
|
|
||||||
|
|
||||||
lea %1, [%1+%3*2]
|
|
||||||
lea %2, [%2+40h]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
|
|
||||||
%macro DATA_COPY_8x4_SSE2 4
|
|
||||||
movdqa xmm0, [%1]
|
|
||||||
movdqa xmm1, [%1+%3]
|
|
||||||
movdqa xmm2, [%1+%3*2]
|
|
||||||
movdqa xmm3, [%1+%4]
|
|
||||||
|
|
||||||
movdqa [%2], xmm0
|
|
||||||
movdqa [%2+10h], xmm1
|
|
||||||
movdqa [%2+20h], xmm2
|
|
||||||
movdqa [%2+30h], xmm3
|
|
||||||
|
|
||||||
lea %1, [%1+%3*4]
|
|
||||||
lea %2, [%2+40h]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
|
|
||||||
%macro CHECK_DATA_16_ZERO_SSE2 3
|
|
||||||
mov eax, 0h
|
|
||||||
movdqa xmm0, [%1]
|
|
||||||
movdqa xmm1, [%1+10h]
|
|
||||||
mov ebx, [ecx]
|
|
||||||
|
|
||||||
pcmpeqw xmm0, xmm7
|
|
||||||
pcmpeqw xmm1, xmm7
|
|
||||||
packsswb xmm0, xmm1
|
|
||||||
pmovmskb edx, xmm0
|
|
||||||
sub edx, 0ffffh
|
|
||||||
|
|
||||||
cmovb eax, ebp
|
|
||||||
add ecx, 4
|
|
||||||
add %1, 20h
|
|
||||||
mov byte [%2+ebx], al
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
%macro CHECK_RS_4x4_BLOCK_2_ZERO_SSE2 5
|
|
||||||
movdqa xmm0, [%1]
|
|
||||||
movdqa xmm1, [%1 + %3]
|
|
||||||
movdqa xmm2, [%1 + %3*2]
|
|
||||||
movdqa xmm3, [%1 + %4]
|
|
||||||
|
|
||||||
movdqa xmm4, xmm0
|
|
||||||
movdqa xmm5, xmm2
|
|
||||||
|
|
||||||
punpcklqdq xmm0, xmm1
|
|
||||||
punpckhqdq xmm4, xmm1
|
|
||||||
punpcklqdq xmm2, xmm3
|
|
||||||
punpckhqdq xmm5, xmm3
|
|
||||||
|
|
||||||
pcmpeqw xmm0, xmm7
|
|
||||||
pcmpeqw xmm2, xmm7
|
|
||||||
pcmpeqw xmm4, xmm7
|
|
||||||
pcmpeqw xmm5, xmm7
|
|
||||||
|
|
||||||
packsswb xmm0, xmm2
|
|
||||||
packsswb xmm4, xmm5
|
|
||||||
pmovmskb eax, xmm0
|
|
||||||
pmovmskb ebx, xmm4
|
|
||||||
|
|
||||||
sub eax, 0ffffh
|
|
||||||
mov eax, 0
|
|
||||||
cmovb eax, %5
|
|
||||||
sub ebx, 0ffffh
|
|
||||||
mov ebx, 0
|
|
||||||
cmovb ebx, %5
|
|
||||||
mov byte [%2], al
|
|
||||||
mov byte [%2+1], bl
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; Data
|
|
||||||
;*******************************************************************************
|
|
||||||
|
|
||||||
%ifdef FORMAT_COFF
|
|
||||||
SECTION .rodata data
|
|
||||||
%else
|
|
||||||
SECTION .rodata align=16
|
|
||||||
%endif
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
SubMbScanIdx:
|
|
||||||
dd 0x0, 0x1, 0x4, 0x5,
|
|
||||||
dd 0x2, 0x3, 0x6, 0x7,
|
|
||||||
dd 0x8, 0x9, 0xc, 0xd,
|
|
||||||
dd 0xa, 0xb, 0xe, 0xf,
|
|
||||||
dd 0x10, 0x11, 0x14, 0x15,
|
|
||||||
dd 0x12, 0x13, 0x16, 0x17,
|
|
||||||
|
|
||||||
;*******************************************************************************
|
;*******************************************************************************
|
||||||
; Code
|
; Code
|
||||||
;*******************************************************************************
|
;*******************************************************************************
|
||||||
@@ -312,71 +56,77 @@ ALIGN 16
|
|||||||
; void_t WelsResBlockZero16x16_sse2(int16_t* pBlock,int32_t iStride)
|
; void_t WelsResBlockZero16x16_sse2(int16_t* pBlock,int32_t iStride)
|
||||||
;*******************************************************************************
|
;*******************************************************************************
|
||||||
WelsResBlockZero16x16_sse2:
|
WelsResBlockZero16x16_sse2:
|
||||||
push esi
|
;push r0
|
||||||
|
%assign push_num 0
|
||||||
mov esi, [esp+08h]
|
LOAD_2_PARA
|
||||||
mov ecx, [esp+0ch]
|
%ifndef X86_32
|
||||||
lea ecx, [ecx*2]
|
movsx r1, r1d
|
||||||
lea eax, [ecx*3]
|
%endif
|
||||||
|
;mov r0, [esp+08h]
|
||||||
|
;mov r1, [esp+0ch]
|
||||||
|
;lea r1, [r1*2]
|
||||||
|
lea r1, [r1*2]
|
||||||
|
;lea r2, [r1*3]
|
||||||
|
lea r2, [r1*3]
|
||||||
|
|
||||||
pxor xmm7, xmm7
|
pxor xmm7, xmm7
|
||||||
|
|
||||||
; four lines
|
; four lines
|
||||||
movdqa [esi], xmm7
|
movdqa [r0], xmm7
|
||||||
movdqa [esi+10h], xmm7
|
movdqa [r0+10h], xmm7
|
||||||
|
|
||||||
movdqa [esi+ecx], xmm7
|
movdqa [r0+r1], xmm7
|
||||||
movdqa [esi+ecx+10h], xmm7
|
movdqa [r0+r1+10h], xmm7
|
||||||
|
|
||||||
movdqa [esi+ecx*2], xmm7
|
movdqa [r0+r1*2], xmm7
|
||||||
movdqa [esi+ecx*2+10h], xmm7
|
movdqa [r0+r1*2+10h], xmm7
|
||||||
|
|
||||||
movdqa [esi+eax], xmm7
|
movdqa [r0+r2], xmm7
|
||||||
movdqa [esi+eax+10h], xmm7
|
movdqa [r0+r2+10h], xmm7
|
||||||
|
|
||||||
; four lines
|
; four lines
|
||||||
lea esi, [esi+ecx*4]
|
lea r0, [r0+r1*4]
|
||||||
movdqa [esi], xmm7
|
movdqa [r0], xmm7
|
||||||
movdqa [esi+10h], xmm7
|
movdqa [r0+10h], xmm7
|
||||||
|
|
||||||
movdqa [esi+ecx], xmm7
|
movdqa [r0+r1], xmm7
|
||||||
movdqa [esi+ecx+10h], xmm7
|
movdqa [r0+r1+10h], xmm7
|
||||||
|
|
||||||
movdqa [esi+ecx*2], xmm7
|
movdqa [r0+r1*2], xmm7
|
||||||
movdqa [esi+ecx*2+10h], xmm7
|
movdqa [r0+r1*2+10h], xmm7
|
||||||
|
|
||||||
movdqa [esi+eax], xmm7
|
movdqa [r0+r2], xmm7
|
||||||
movdqa [esi+eax+10h], xmm7
|
movdqa [r0+r2+10h], xmm7
|
||||||
|
|
||||||
; four lines
|
; four lines
|
||||||
lea esi, [esi+ecx*4]
|
lea r0, [r0+r1*4]
|
||||||
movdqa [esi], xmm7
|
movdqa [r0], xmm7
|
||||||
movdqa [esi+10h], xmm7
|
movdqa [r0+10h], xmm7
|
||||||
|
|
||||||
movdqa [esi+ecx], xmm7
|
movdqa [r0+r1], xmm7
|
||||||
movdqa [esi+ecx+10h], xmm7
|
movdqa [r0+r1+10h], xmm7
|
||||||
|
|
||||||
movdqa [esi+ecx*2], xmm7
|
movdqa [r0+r1*2], xmm7
|
||||||
movdqa [esi+ecx*2+10h], xmm7
|
movdqa [r0+r1*2+10h], xmm7
|
||||||
|
|
||||||
movdqa [esi+eax], xmm7
|
movdqa [r0+r2], xmm7
|
||||||
movdqa [esi+eax+10h], xmm7
|
movdqa [r0+r2+10h], xmm7
|
||||||
|
|
||||||
; four lines
|
; four lines
|
||||||
lea esi, [esi+ecx*4]
|
lea r0, [r0+r1*4]
|
||||||
movdqa [esi], xmm7
|
movdqa [r0], xmm7
|
||||||
movdqa [esi+10h], xmm7
|
movdqa [r0+10h], xmm7
|
||||||
|
|
||||||
movdqa [esi+ecx], xmm7
|
movdqa [r0+r1], xmm7
|
||||||
movdqa [esi+ecx+10h], xmm7
|
movdqa [r0+r1+10h], xmm7
|
||||||
|
|
||||||
movdqa [esi+ecx*2], xmm7
|
movdqa [r0+r1*2], xmm7
|
||||||
movdqa [esi+ecx*2+10h], xmm7
|
movdqa [r0+r1*2+10h], xmm7
|
||||||
|
|
||||||
movdqa [esi+eax], xmm7
|
movdqa [r0+r2], xmm7
|
||||||
movdqa [esi+eax+10h], xmm7
|
movdqa [r0+r2+10h], xmm7
|
||||||
|
|
||||||
pop esi
|
;pop r0
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
@@ -387,27 +137,31 @@ ALIGN 16
|
|||||||
; void_t WelsResBlockZero8x8_sse2(int16_t * pBlock, int32_t iStride)
|
; void_t WelsResBlockZero8x8_sse2(int16_t * pBlock, int32_t iStride)
|
||||||
;*******************************************************************************
|
;*******************************************************************************
|
||||||
WelsResBlockZero8x8_sse2:
|
WelsResBlockZero8x8_sse2:
|
||||||
push esi
|
;push r0
|
||||||
|
%assign push_num 0
|
||||||
mov esi, [esp+08h]
|
LOAD_2_PARA
|
||||||
mov ecx, [esp+0ch]
|
%ifndef X86_32
|
||||||
lea ecx, [ecx*2]
|
movsx r1, r1d
|
||||||
lea eax, [ecx*3]
|
%endif
|
||||||
|
;mov r0, [esp+08h]
|
||||||
|
;mov r1, [esp+0ch]
|
||||||
|
lea r1, [r1*2]
|
||||||
|
lea r2, [r1*3]
|
||||||
|
|
||||||
pxor xmm7, xmm7
|
pxor xmm7, xmm7
|
||||||
|
|
||||||
movdqa [esi], xmm7
|
movdqa [r0], xmm7
|
||||||
movdqa [esi+ecx], xmm7
|
movdqa [r0+r1], xmm7
|
||||||
movdqa [esi+ecx*2], xmm7
|
movdqa [r0+r1*2], xmm7
|
||||||
movdqa [esi+eax], xmm7
|
movdqa [r0+r2], xmm7
|
||||||
|
|
||||||
lea esi, [esi+ecx*4]
|
lea r0, [r0+r1*4]
|
||||||
movdqa [esi], xmm7
|
movdqa [r0], xmm7
|
||||||
movdqa [esi+ecx], xmm7
|
movdqa [r0+r1], xmm7
|
||||||
movdqa [esi+ecx*2], xmm7
|
movdqa [r0+r1*2], xmm7
|
||||||
movdqa [esi+eax], xmm7
|
movdqa [r0+r2], xmm7
|
||||||
|
|
||||||
|
|
||||||
pop esi
|
;pop r0
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|||||||
@@ -1,169 +0,0 @@
|
|||||||
;*!
|
|
||||||
;* \copy
|
|
||||||
;* Copyright (c) 2009-2013, Cisco Systems
|
|
||||||
;* All rights reserved.
|
|
||||||
;*
|
|
||||||
;* Redistribution and use in source and binary forms, with or without
|
|
||||||
;* modification, are permitted provided that the following conditions
|
|
||||||
;* are met:
|
|
||||||
;*
|
|
||||||
;* * Redistributions of source code must retain the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer.
|
|
||||||
;*
|
|
||||||
;* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer in
|
|
||||||
;* the documentation and/or other materials provided with the
|
|
||||||
;* distribution.
|
|
||||||
;*
|
|
||||||
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
;* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;* cpu_mmx.asm
|
|
||||||
;*
|
|
||||||
;* Abstract
|
|
||||||
;* verify cpuid feature support and cpuid detection
|
|
||||||
;*
|
|
||||||
;* History
|
|
||||||
;* 04/29/2009 Created
|
|
||||||
;*
|
|
||||||
;*************************************************************************/
|
|
||||||
|
|
||||||
bits 32
|
|
||||||
|
|
||||||
;******************************************************************************************
|
|
||||||
; Macros
|
|
||||||
;******************************************************************************************
|
|
||||||
|
|
||||||
%macro WELS_EXTERN 1
|
|
||||||
%ifdef PREFIX
|
|
||||||
global _%1
|
|
||||||
%define %1 _%1
|
|
||||||
%else
|
|
||||||
global %1
|
|
||||||
%endif
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;******************************************************************************************
|
|
||||||
; Code
|
|
||||||
;******************************************************************************************
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
; refer to "The IA-32 Intel(R) Architecture Software Developers Manual, Volume 2A A-M"
|
|
||||||
; section CPUID - CPU Identification
|
|
||||||
|
|
||||||
WELS_EXTERN WelsCPUIdVerify
|
|
||||||
ALIGN 16
|
|
||||||
;******************************************************************************************
|
|
||||||
; int32_t WelsCPUIdVerify()
|
|
||||||
;******************************************************************************************
|
|
||||||
WelsCPUIdVerify:
|
|
||||||
pushfd ; decrease the SP by 4 and load EFLAGS register onto stack, pushfd 32 bit and pushf for 16 bit
|
|
||||||
pushfd ; need push 2 EFLAGS, one for processing and the another one for storing purpose
|
|
||||||
pop ecx ; get EFLAGS to bit manipulation
|
|
||||||
mov eax, ecx ; store into ecx followed
|
|
||||||
xor eax, 00200000h ; get ID flag (bit 21) of EFLAGS to directly indicate cpuid support or not
|
|
||||||
xor eax, ecx ; get the ID flag bitwise, eax - 0: not support; otherwise: support
|
|
||||||
popfd ; store back EFLAGS and keep unchanged for system
|
|
||||||
ret
|
|
||||||
|
|
||||||
WELS_EXTERN WelsCPUId
|
|
||||||
ALIGN 16
|
|
||||||
;****************************************************************************************************
|
|
||||||
; void WelsCPUId( int32_t index, int32_t *uiFeatureA, int32_t *uiFeatureB, int32_t *uiFeatureC, int32_t *uiFeatureD )
|
|
||||||
;****************************************************************************************************
|
|
||||||
WelsCPUId:
|
|
||||||
push ebx
|
|
||||||
push edi
|
|
||||||
|
|
||||||
mov eax, [esp+12] ; operating index
|
|
||||||
cpuid ; cpuid
|
|
||||||
|
|
||||||
; processing various information return
|
|
||||||
mov edi, [esp+16]
|
|
||||||
mov [edi], eax
|
|
||||||
mov edi, [esp+20]
|
|
||||||
mov [edi], ebx
|
|
||||||
mov edi, [esp+24]
|
|
||||||
mov [edi], ecx
|
|
||||||
mov edi, [esp+28]
|
|
||||||
mov [edi], edx
|
|
||||||
|
|
||||||
pop edi
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
|
|
||||||
WELS_EXTERN WelsCPUSupportAVX
|
|
||||||
; need call after cpuid=1 and eax, ecx flag got then
|
|
||||||
ALIGN 16
|
|
||||||
;****************************************************************************************************
|
|
||||||
; int32_t WelsCPUSupportAVX( uint32_t eax, uint32_t ecx )
|
|
||||||
;****************************************************************************************************
|
|
||||||
WelsCPUSupportAVX:
|
|
||||||
mov eax, [esp+4]
|
|
||||||
mov ecx, [esp+8]
|
|
||||||
|
|
||||||
; refer to detection of AVX addressed in INTEL AVX manual document
|
|
||||||
and ecx, 018000000H
|
|
||||||
cmp ecx, 018000000H ; check both OSXSAVE and AVX feature flags
|
|
||||||
jne avx_not_supported
|
|
||||||
; processor supports AVX instructions and XGETBV is enabled by OS
|
|
||||||
mov ecx, 0 ; specify 0 for XFEATURE_ENABLED_MASK register
|
|
||||||
XGETBV ; result in EDX:EAX
|
|
||||||
and eax, 06H
|
|
||||||
cmp eax, 06H ; check OS has enabled both XMM and YMM state support
|
|
||||||
jne avx_not_supported
|
|
||||||
mov eax, 1
|
|
||||||
ret
|
|
||||||
avx_not_supported:
|
|
||||||
mov eax, 0
|
|
||||||
ret
|
|
||||||
|
|
||||||
WELS_EXTERN WelsCPUSupportFMA
|
|
||||||
; need call after cpuid=1 and eax, ecx flag got then
|
|
||||||
ALIGN 16
|
|
||||||
;****************************************************************************************************
|
|
||||||
; int32_t WelsCPUSupportFMA( uint32_t eax, uint32_t ecx )
|
|
||||||
;****************************************************************************************************
|
|
||||||
WelsCPUSupportFMA:
|
|
||||||
mov eax, [esp+4]
|
|
||||||
mov ecx, [esp+8]
|
|
||||||
|
|
||||||
; refer to detection of FMA addressed in INTEL AVX manual document
|
|
||||||
and ecx, 018001000H
|
|
||||||
cmp ecx, 018001000H ; check OSXSAVE, AVX, FMA feature flags
|
|
||||||
jne fma_not_supported
|
|
||||||
; processor supports AVX,FMA instructions and XGETBV is enabled by OS
|
|
||||||
mov ecx, 0 ; specify 0 for XFEATURE_ENABLED_MASK register
|
|
||||||
XGETBV ; result in EDX:EAX
|
|
||||||
and eax, 06H
|
|
||||||
cmp eax, 06H ; check OS has enabled both XMM and YMM state support
|
|
||||||
jne fma_not_supported
|
|
||||||
mov eax, 1
|
|
||||||
ret
|
|
||||||
fma_not_supported:
|
|
||||||
mov eax, 0
|
|
||||||
ret
|
|
||||||
|
|
||||||
WELS_EXTERN WelsEmms
|
|
||||||
ALIGN 16
|
|
||||||
;******************************************************************************************
|
|
||||||
; void WelsEmms()
|
|
||||||
;******************************************************************************************
|
|
||||||
WelsEmms:
|
|
||||||
emms ; empty mmx technology states
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -42,8 +42,6 @@
|
|||||||
|
|
||||||
%include "asm_inc.asm"
|
%include "asm_inc.asm"
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
;*******************************************************************************
|
;*******************************************************************************
|
||||||
; Macros and other preprocessor constants
|
; Macros and other preprocessor constants
|
||||||
;*******************************************************************************
|
;*******************************************************************************
|
||||||
@@ -93,19 +91,15 @@ ALIGN 16
|
|||||||
;*******************************************************************************
|
;*******************************************************************************
|
||||||
|
|
||||||
IdctResAddPred_mmx:
|
IdctResAddPred_mmx:
|
||||||
|
%assign push_num 0
|
||||||
%define pushsize 0
|
LOAD_3_PARA
|
||||||
%define pPred esp+pushsize+4
|
%ifndef X86_32
|
||||||
%define kiStride esp+pushsize+8
|
movsx r1, r1d
|
||||||
%define pRs esp+pushsize+12
|
%endif
|
||||||
|
movq mm0, [r2+ 0]
|
||||||
mov eax, [pRs ]
|
movq mm1, [r2+ 8]
|
||||||
mov edx, [pPred ]
|
movq mm2, [r2+16]
|
||||||
mov ecx, [kiStride]
|
movq mm3, [r2+24]
|
||||||
movq mm0, [eax+ 0]
|
|
||||||
movq mm1, [eax+ 8]
|
|
||||||
movq mm2, [eax+16]
|
|
||||||
movq mm3, [eax+24]
|
|
||||||
|
|
||||||
MMX_Trans4x4W mm0, mm1, mm2, mm3, mm4
|
MMX_Trans4x4W mm0, mm1, mm2, mm3, mm4
|
||||||
MMX_IDCT mm1, mm2, mm3, mm4, mm0, mm6
|
MMX_IDCT mm1, mm2, mm3, mm4, mm0, mm6
|
||||||
@@ -115,15 +109,12 @@ IdctResAddPred_mmx:
|
|||||||
WELS_Zero mm7
|
WELS_Zero mm7
|
||||||
WELS_DW32 mm6
|
WELS_DW32 mm6
|
||||||
|
|
||||||
MMX_StoreDiff4P mm3, mm0, mm6, mm7, [edx]
|
MMX_StoreDiff4P mm3, mm0, mm6, mm7, [r0]
|
||||||
MMX_StoreDiff4P mm4, mm0, mm6, mm7, [edx+ecx]
|
MMX_StoreDiff4P mm4, mm0, mm6, mm7, [r0+r1]
|
||||||
lea edx, [edx+2*ecx]
|
lea r0, [r0+2*r1]
|
||||||
MMX_StoreDiff4P mm1, mm0, mm6, mm7, [edx]
|
MMX_StoreDiff4P mm1, mm0, mm6, mm7, [r0]
|
||||||
MMX_StoreDiff4P mm2, mm0, mm6, mm7, [edx+ecx]
|
MMX_StoreDiff4P mm2, mm0, mm6, mm7, [r0+r1]
|
||||||
|
|
||||||
|
|
||||||
%undef pushsize
|
|
||||||
%undef pPred
|
|
||||||
%undef kiStride
|
|
||||||
%undef pRs
|
|
||||||
emms
|
emms
|
||||||
ret
|
ret
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,655 +0,0 @@
|
|||||||
;*!
|
|
||||||
;* \copy
|
|
||||||
;* Copyright (c) 2009-2013, Cisco Systems
|
|
||||||
;* All rights reserved.
|
|
||||||
;*
|
|
||||||
;* Redistribution and use in source and binary forms, with or without
|
|
||||||
;* modification, are permitted provided that the following conditions
|
|
||||||
;* are met:
|
|
||||||
;*
|
|
||||||
;* * Redistributions of source code must retain the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer.
|
|
||||||
;*
|
|
||||||
;* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer in
|
|
||||||
;* the documentation and/or other materials provided with the
|
|
||||||
;* distribution.
|
|
||||||
;*
|
|
||||||
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
;* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;* expand_picture.asm
|
|
||||||
;*
|
|
||||||
;* Abstract
|
|
||||||
;* mmxext/sse for expand_frame
|
|
||||||
;*
|
|
||||||
;* History
|
|
||||||
;* 09/25/2009 Created
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;*************************************************************************/
|
|
||||||
|
|
||||||
%include "asm_inc.asm"
|
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Macros and other preprocessor constants
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Local Data (Read Only)
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
;SECTION .rodata pData align=16
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Various memory constants (trigonometric values or rounding values)
|
|
||||||
;***********************************************************************
|
|
||||||
;%define PADDING_SIZE_ASM 32 ; PADDING_LENGTH
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Code
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
;WELS_EXTERN expand_picture_luma_mmx
|
|
||||||
;WELS_EXTERN expand_picture_chroma_mmx
|
|
||||||
WELS_EXTERN ExpandPictureLuma_sse2
|
|
||||||
WELS_EXTERN ExpandPictureChromaAlign_sse2 ; for chroma alignment
|
|
||||||
WELS_EXTERN ExpandPictureChromaUnalign_sse2 ; for chroma unalignment
|
|
||||||
|
|
||||||
;;;;;;;expanding result;;;;;;;
|
|
||||||
|
|
||||||
;aaaa|attttttttttttttttb|bbbb
|
|
||||||
;aaaa|attttttttttttttttb|bbbb
|
|
||||||
;aaaa|attttttttttttttttb|bbbb
|
|
||||||
;aaaa|attttttttttttttttb|bbbb
|
|
||||||
;----------------------------
|
|
||||||
;aaaa|attttttttttttttttb|bbbb
|
|
||||||
;llll|l r|rrrr
|
|
||||||
;llll|l r|rrrr
|
|
||||||
;llll|l r|rrrr
|
|
||||||
;llll|l r|rrrr
|
|
||||||
;llll|l r|rrrr
|
|
||||||
;cccc|ceeeeeeeeeeeeeeeed|dddd
|
|
||||||
;----------------------------
|
|
||||||
;cccc|ceeeeeeeeeeeeeeeed|dddd
|
|
||||||
;cccc|ceeeeeeeeeeeeeeeed|dddd
|
|
||||||
;cccc|ceeeeeeeeeeeeeeeed|dddd
|
|
||||||
;cccc|ceeeeeeeeeeeeeeeed|dddd
|
|
||||||
|
|
||||||
%macro mov_line_8x4_mmx 3 ; dst, stride, mm?
|
|
||||||
movq [%1], %3
|
|
||||||
movq [%1+%2], %3
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
movq [%1], %3
|
|
||||||
movq [%1+%2], %3
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro mov_line_end8x4_mmx 3 ; dst, stride, mm?
|
|
||||||
movq [%1], %3
|
|
||||||
movq [%1+%2], %3
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
movq [%1], %3
|
|
||||||
movq [%1+%2], %3
|
|
||||||
lea %1, [%1+%2]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro mov_line_16x4_sse2 4 ; dst, stride, xmm?, u/a
|
|
||||||
movdq%4 [%1], %3 ; top(bottom)_0
|
|
||||||
movdq%4 [%1+%2], %3 ; top(bottom)_1
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
movdq%4 [%1], %3 ; top(bottom)_2
|
|
||||||
movdq%4 [%1+%2], %3 ; top(bottom)_3
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro mov_line_end16x4_sse2 4 ; dst, stride, xmm?, u/a
|
|
||||||
movdq%4 [%1], %3 ; top(bottom)_0
|
|
||||||
movdq%4 [%1+%2], %3 ; top(bottom)_1
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
movdq%4 [%1], %3 ; top(bottom)_2
|
|
||||||
movdq%4 [%1+%2], %3 ; top(bottom)_3
|
|
||||||
lea %1, [%1+%2]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro mov_line_32x4_sse2 3 ; dst, stride, xmm?
|
|
||||||
movdqa [%1], %3 ; top(bottom)_0
|
|
||||||
movdqa [%1+16], %3 ; top(bottom)_0
|
|
||||||
movdqa [%1+%2], %3 ; top(bottom)_1
|
|
||||||
movdqa [%1+%2+16], %3 ; top(bottom)_1
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
movdqa [%1], %3 ; top(bottom)_2
|
|
||||||
movdqa [%1+16], %3 ; top(bottom)_2
|
|
||||||
movdqa [%1+%2], %3 ; top(bottom)_3
|
|
||||||
movdqa [%1+%2+16], %3 ; top(bottom)_3
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro mov_line_end32x4_sse2 3 ; dst, stride, xmm?
|
|
||||||
movdqa [%1], %3 ; top(bottom)_0
|
|
||||||
movdqa [%1+16], %3 ; top(bottom)_0
|
|
||||||
movdqa [%1+%2], %3 ; top(bottom)_1
|
|
||||||
movdqa [%1+%2+16], %3 ; top(bottom)_1
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
movdqa [%1], %3 ; top(bottom)_2
|
|
||||||
movdqa [%1+16], %3 ; top(bottom)_2
|
|
||||||
movdqa [%1+%2], %3 ; top(bottom)_3
|
|
||||||
movdqa [%1+%2+16], %3 ; top(bottom)_3
|
|
||||||
lea %1, [%1+%2]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro exp_top_bottom_sse2 1 ; iPaddingSize [luma(32)/chroma(16)]
|
|
||||||
; ebx [width/16(8)]
|
|
||||||
; esi [pSrc+0], edi [pSrc-1], ecx [-stride], 32(16) ; top
|
|
||||||
; eax [pSrc+(h-1)*stride], ebp [pSrc+(h+31)*stride], 32(16) ; bottom
|
|
||||||
|
|
||||||
%if %1 == 32 ; for luma
|
|
||||||
sar ebx, 04h ; width / 16(8) pixels
|
|
||||||
.top_bottom_loops:
|
|
||||||
; top
|
|
||||||
movdqa xmm0, [esi] ; first line of picture pData
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
mov_line_end16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
|
|
||||||
; bottom
|
|
||||||
movdqa xmm1, [eax] ; last line of picture pData
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
mov_line_end16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
|
|
||||||
lea esi, [esi+16] ; top pSrc
|
|
||||||
lea edi, [edi+16] ; top dst
|
|
||||||
lea eax, [eax+16] ; bottom pSrc
|
|
||||||
lea ebp, [ebp+16] ; bottom dst
|
|
||||||
neg ecx ; positive/negative stride need for next loop?
|
|
||||||
|
|
||||||
dec ebx
|
|
||||||
jnz near .top_bottom_loops
|
|
||||||
%elif %1 == 16 ; for chroma ??
|
|
||||||
mov edx, ebx
|
|
||||||
sar ebx, 04h ; (width / 16) pixels
|
|
||||||
.top_bottom_loops:
|
|
||||||
; top
|
|
||||||
movdqa xmm0, [esi] ; first line of picture pData
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
mov_line_end16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
|
|
||||||
; bottom
|
|
||||||
movdqa xmm1, [eax] ; last line of picture pData
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
mov_line_end16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
|
|
||||||
lea esi, [esi+16] ; top pSrc
|
|
||||||
lea edi, [edi+16] ; top dst
|
|
||||||
lea eax, [eax+16] ; bottom pSrc
|
|
||||||
lea ebp, [ebp+16] ; bottom dst
|
|
||||||
neg ecx ; positive/negative stride need for next loop?
|
|
||||||
|
|
||||||
dec ebx
|
|
||||||
jnz near .top_bottom_loops
|
|
||||||
|
|
||||||
; for remaining 8 bytes
|
|
||||||
and edx, 0fh ; any 8 bytes left?
|
|
||||||
test edx, edx
|
|
||||||
jz near .to_be_continued ; no left to exit here
|
|
||||||
|
|
||||||
; top
|
|
||||||
movq mm0, [esi] ; remained 8 byte
|
|
||||||
mov_line_8x4_mmx edi, ecx, mm0 ; dst, stride, mm?
|
|
||||||
mov_line_8x4_mmx edi, ecx, mm0 ; dst, stride, mm?
|
|
||||||
mov_line_8x4_mmx edi, ecx, mm0 ; dst, stride, mm?
|
|
||||||
mov_line_end8x4_mmx edi, ecx, mm0 ; dst, stride, mm?
|
|
||||||
; bottom
|
|
||||||
movq mm1, [eax]
|
|
||||||
mov_line_8x4_mmx ebp, ecx, mm1 ; dst, stride, mm?
|
|
||||||
mov_line_8x4_mmx ebp, ecx, mm1 ; dst, stride, mm?
|
|
||||||
mov_line_8x4_mmx ebp, ecx, mm1 ; dst, stride, mm?
|
|
||||||
mov_line_end8x4_mmx ebp, ecx, mm1 ; dst, stride, mm?
|
|
||||||
WELSEMMS
|
|
||||||
|
|
||||||
.to_be_continued:
|
|
||||||
%endif
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro exp_left_right_sse2 2 ; iPaddingSize [luma(32)/chroma(16)], u/a
|
|
||||||
; ecx [height]
|
|
||||||
; esi [pSrc+0], edi [pSrc-32], edx [stride], 32(16) ; left
|
|
||||||
; ebx [pSrc+(w-1)], ebp [pSrc+w], 32(16) ; right
|
|
||||||
; xor eax, eax ; for pixel pData (uint8_t) ; make sure eax=0 at least high 24 bits of eax = 0
|
|
||||||
|
|
||||||
%if %1 == 32 ; for luma
|
|
||||||
.left_right_loops:
|
|
||||||
; left
|
|
||||||
mov al, byte [esi] ; pixel pData for left border
|
|
||||||
butterfly_1to16_sse xmm0, xmm1, a ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
movdqa [edi], xmm0
|
|
||||||
movdqa [edi+16], xmm0
|
|
||||||
|
|
||||||
; right
|
|
||||||
mov al, byte [ebx]
|
|
||||||
butterfly_1to16_sse xmm1, xmm2, a ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
movdqa [ebp], xmm1
|
|
||||||
movdqa [ebp+16], xmm1
|
|
||||||
|
|
||||||
lea esi, [esi+edx] ; left pSrc
|
|
||||||
lea edi, [edi+edx] ; left dst
|
|
||||||
lea ebx, [ebx+edx] ; right pSrc
|
|
||||||
lea ebp, [ebp+edx] ; right dst
|
|
||||||
|
|
||||||
dec ecx
|
|
||||||
jnz near .left_right_loops
|
|
||||||
%elif %1 == 16 ; for chroma ??
|
|
||||||
.left_right_loops:
|
|
||||||
; left
|
|
||||||
mov al, byte [esi] ; pixel pData for left border
|
|
||||||
butterfly_1to16_sse xmm0, xmm1, a ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
movdqa [edi], xmm0
|
|
||||||
|
|
||||||
; right
|
|
||||||
mov al, byte [ebx]
|
|
||||||
butterfly_1to16_sse xmm1, xmm2, a ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
movdq%2 [ebp], xmm1 ; might not be aligned 16 bytes in case chroma planes
|
|
||||||
|
|
||||||
lea esi, [esi+edx] ; left pSrc
|
|
||||||
lea edi, [edi+edx] ; left dst
|
|
||||||
lea ebx, [ebx+edx] ; right pSrc
|
|
||||||
lea ebp, [ebp+edx] ; right dst
|
|
||||||
|
|
||||||
dec ecx
|
|
||||||
jnz near .left_right_loops
|
|
||||||
%endif
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro exp_cross_sse2 2 ; iPaddingSize [luma(32)/chroma(16)], u/a
|
|
||||||
; top-left: (x)mm3, top-right: (x)mm4, bottom-left: (x)mm5, bottom-right: (x)mm6
|
|
||||||
; edi: TL, ebp: TR, eax: BL, ebx: BR, ecx, -stride
|
|
||||||
%if %1 == 32 ; luma
|
|
||||||
; TL
|
|
||||||
mov_line_32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
mov_line_end32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
|
|
||||||
; TR
|
|
||||||
mov_line_32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
mov_line_end32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
|
|
||||||
; BL
|
|
||||||
mov_line_32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
mov_line_end32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
|
|
||||||
; BR
|
|
||||||
mov_line_32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
mov_line_end32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
%elif %1 == 16 ; chroma
|
|
||||||
; TL
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm3, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm3, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm3, a ; dst, stride, xmm?
|
|
||||||
mov_line_end16x4_sse2 edi, ecx, xmm3, a ; dst, stride, xmm?
|
|
||||||
|
|
||||||
; TR
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm4, %2 ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm4, %2 ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm4, %2 ; dst, stride, xmm?
|
|
||||||
mov_line_end16x4_sse2 ebp, ecx, xmm4, %2 ; dst, stride, xmm?
|
|
||||||
|
|
||||||
; BL
|
|
||||||
mov_line_16x4_sse2 eax, ecx, xmm5, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 eax, ecx, xmm5, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 eax, ecx, xmm5, a ; dst, stride, xmm?
|
|
||||||
mov_line_end16x4_sse2 eax, ecx, xmm5, a ; dst, stride, xmm?
|
|
||||||
|
|
||||||
; BR
|
|
||||||
mov_line_16x4_sse2 ebx, ecx, xmm6, %2 ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 ebx, ecx, xmm6, %2 ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 ebx, ecx, xmm6, %2 ; dst, stride, xmm?
|
|
||||||
mov_line_end16x4_sse2 ebx, ecx, xmm6, %2 ; dst, stride, xmm?
|
|
||||||
%endif
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************----------------
|
|
||||||
; void ExpandPictureLuma_sse2( uint8_t *pDst,
|
|
||||||
; const int32_t kiStride,
|
|
||||||
; const int32_t kiWidth,
|
|
||||||
; const int32_t kiHeight );
|
|
||||||
;***********************************************************************----------------
|
|
||||||
ExpandPictureLuma_sse2:
|
|
||||||
push ebx
|
|
||||||
push edx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
|
|
||||||
; for both top and bottom border
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; pDst
|
|
||||||
mov edx, [esp+28] ; kiStride
|
|
||||||
mov ebx, [esp+32] ; kiWidth
|
|
||||||
mov eax, [esp+36] ; kiHeight
|
|
||||||
; also prepare for cross border pData top-left: xmm3
|
|
||||||
; xor ecx, ecx
|
|
||||||
mov cl, byte [esi]
|
|
||||||
butterfly_1to16_sse xmm3, xmm4, c ; pDst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; load top border
|
|
||||||
mov ecx, edx ; kiStride
|
|
||||||
neg ecx ; -kiStride
|
|
||||||
lea edi, [esi+ecx] ; last line of top border
|
|
||||||
; load bottom border
|
|
||||||
dec eax ; h-1
|
|
||||||
imul eax, edx ; (h-1)*kiStride
|
|
||||||
lea eax, [esi+eax] ; last line of picture pData
|
|
||||||
sal edx, 05h ; 32*kiStride
|
|
||||||
lea ebp, [eax+edx] ; last line of bottom border, (h-1)*stride + 32 * stride
|
|
||||||
; also prepare for cross border pData: bottom-left with xmm5, bottom-right xmm6
|
|
||||||
dec ebx ; kiWidth-1
|
|
||||||
lea ebx, [eax+ebx] ; dst[w-1][h-1]
|
|
||||||
; xor edx, edx
|
|
||||||
mov dl, byte [eax] ; bottom-left
|
|
||||||
butterfly_1to16_sse xmm5, xmm6, d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
mov dl, byte [ebx] ; bottom-right
|
|
||||||
butterfly_1to16_sse xmm6, xmm4, d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; for top & bottom expanding
|
|
||||||
mov ebx, [esp+32] ; kiWidth
|
|
||||||
exp_top_bottom_sse2 32
|
|
||||||
|
|
||||||
; for both left and right border
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; p_dst: left border pSrc
|
|
||||||
mov edx, [esp+28] ; kiStride
|
|
||||||
mov ebx, [esp+32] ; kiWidth
|
|
||||||
mov ecx, [esp+36] ; kiHeight
|
|
||||||
; load left border
|
|
||||||
mov eax, -32 ; luma=-32, chroma=-16
|
|
||||||
lea edi, [esi+eax] ; left border dst
|
|
||||||
dec ebx
|
|
||||||
lea ebx, [esi+ebx] ; right border pSrc, (p_dst + width - 1)
|
|
||||||
lea ebp, [ebx+1] ; right border dst
|
|
||||||
; prepare for cross border pData: top-right with xmm4
|
|
||||||
; xor eax, eax
|
|
||||||
mov al, byte [ebx] ; top-right
|
|
||||||
butterfly_1to16_sse xmm4, xmm0, a ; pDst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; for left & right border expanding
|
|
||||||
exp_left_right_sse2 32, a
|
|
||||||
|
|
||||||
; for cross border [top-left, top-right, bottom-left, bottom-right]
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; pDst
|
|
||||||
mov ecx, [esp+28] ; kiStride
|
|
||||||
mov ebx, [esp+32] ; kiWidth
|
|
||||||
mov edx, [esp+36] ; kiHeight
|
|
||||||
; have done xmm3,..,xmm6 cross pData initialization above, perform pading as below, To be continued..
|
|
||||||
mov eax, -32 ; luma=-32, chroma=-16
|
|
||||||
neg ecx ; -stride
|
|
||||||
lea edi, [esi+eax]
|
|
||||||
lea edi, [edi+ecx] ; last line of top-left border
|
|
||||||
lea ebp, [esi+ebx]
|
|
||||||
lea ebp, [ebp+ecx] ; last line of top-right border
|
|
||||||
add edx, 32 ; height+32(16), luma=32, chroma=16
|
|
||||||
mov ecx, [esp+28] ; kiStride
|
|
||||||
imul edx, ecx ; (height+32(16)) * stride
|
|
||||||
lea eax, [edi+edx] ; last line of bottom-left border
|
|
||||||
lea ebx, [ebp+edx] ; last line of bottom-right border
|
|
||||||
neg ecx ; -kiStride
|
|
||||||
; for left & right border expanding
|
|
||||||
exp_cross_sse2 32, a
|
|
||||||
|
|
||||||
; sfence ; commit cache write back memory
|
|
||||||
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop edx
|
|
||||||
pop ebx
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************----------------
|
|
||||||
; void ExpandPictureChromaAlign_sse2( uint8_t *pDst,
|
|
||||||
; const int32_t kiStride,
|
|
||||||
; const int32_t kiWidth,
|
|
||||||
; const int32_t kiHeight );
|
|
||||||
;***********************************************************************----------------
|
|
||||||
ExpandPictureChromaAlign_sse2:
|
|
||||||
push ebx
|
|
||||||
push edx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
|
|
||||||
; for both top and bottom border
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; pDst
|
|
||||||
mov edx, [esp+28] ; kiStride
|
|
||||||
mov ebx, [esp+32] ; kiWidth
|
|
||||||
mov eax, [esp+36] ; kiHeight
|
|
||||||
; also prepare for cross border pData top-left: xmm3
|
|
||||||
; xor ecx, ecx
|
|
||||||
mov cl, byte [esi]
|
|
||||||
butterfly_1to16_sse xmm3, xmm4, c ; pDst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; load top border
|
|
||||||
mov ecx, edx ; kiStride
|
|
||||||
neg ecx ; -kiStride
|
|
||||||
lea edi, [esi+ecx] ; last line of top border
|
|
||||||
; load bottom border
|
|
||||||
dec eax ; h-1
|
|
||||||
imul eax, edx ; (h-1)*kiStride
|
|
||||||
lea eax, [esi+eax] ; last line of picture pData
|
|
||||||
sal edx, 04h ; 16*kiStride
|
|
||||||
lea ebp, [eax+edx] ; last line of bottom border, (h-1)*kiStride + 16 * kiStride
|
|
||||||
; also prepare for cross border pData: bottom-left with xmm5, bottom-right xmm6
|
|
||||||
dec ebx ; kiWidth-1
|
|
||||||
lea ebx, [eax+ebx] ; pDst[w-1][h-1]
|
|
||||||
; xor edx, edx
|
|
||||||
mov dl, byte [eax] ; bottom-left
|
|
||||||
butterfly_1to16_sse xmm5, xmm6, d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
mov dl, byte [ebx] ; bottom-right
|
|
||||||
butterfly_1to16_sse xmm6, xmm4, d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; for top & bottom expanding
|
|
||||||
mov ebx, [esp+32] ; kiWidth
|
|
||||||
exp_top_bottom_sse2 16
|
|
||||||
|
|
||||||
; for both left and right border
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; pDst: left border pSrc
|
|
||||||
mov edx, [esp+28] ; kiStride
|
|
||||||
mov ebx, [esp+32] ; kiWidth
|
|
||||||
mov ecx, [esp+36] ; kiHeight
|
|
||||||
; load left border
|
|
||||||
mov eax, -16 ; luma=-32, chroma=-16
|
|
||||||
lea edi, [esi+eax] ; left border dst
|
|
||||||
dec ebx
|
|
||||||
lea ebx, [esi+ebx] ; right border pSrc, (p_dst + width - 1)
|
|
||||||
lea ebp, [ebx+1] ; right border dst
|
|
||||||
; prepare for cross border pData: top-right with xmm4
|
|
||||||
; xor eax, eax
|
|
||||||
mov al, byte [ebx] ; top-right
|
|
||||||
butterfly_1to16_sse xmm4, xmm0, a ; pDst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; for left & right border expanding
|
|
||||||
exp_left_right_sse2 16, a
|
|
||||||
|
|
||||||
; for cross border [top-left, top-right, bottom-left, bottom-right]
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; pDst
|
|
||||||
mov ecx, [esp+28] ; kiStride
|
|
||||||
mov ebx, [esp+32] ; kiWidth
|
|
||||||
mov edx, [esp+36] ; kiHeight
|
|
||||||
; have done xmm3,..,xmm6 cross pData initialization above, perform pading as below, To be continued..
|
|
||||||
mov eax, -16 ; chroma=-16
|
|
||||||
neg ecx ; -stride
|
|
||||||
lea edi, [esi+eax]
|
|
||||||
lea edi, [edi+ecx] ; last line of top-left border
|
|
||||||
lea ebp, [esi+ebx]
|
|
||||||
lea ebp, [ebp+ecx] ; last line of top-right border
|
|
||||||
mov ecx, [esp+28] ; kiStride
|
|
||||||
add edx, 16 ; height+16, luma=32, chroma=16
|
|
||||||
imul edx, ecx ; (kiHeight+16) * kiStride
|
|
||||||
lea eax, [edi+edx] ; last line of bottom-left border
|
|
||||||
lea ebx, [ebp+edx] ; last line of bottom-right border
|
|
||||||
neg ecx ; -kiStride
|
|
||||||
; for left & right border expanding
|
|
||||||
exp_cross_sse2 16, a
|
|
||||||
|
|
||||||
; sfence ; commit cache write back memory
|
|
||||||
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop edx
|
|
||||||
pop ebx
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************----------------
|
|
||||||
; void ExpandPictureChromaUnalign_sse2( uint8_t *pDst,
|
|
||||||
; const int32_t kiStride,
|
|
||||||
; const int32_t kiWidth,
|
|
||||||
; const int32_t kiHeight );
|
|
||||||
;***********************************************************************----------------
|
|
||||||
ExpandPictureChromaUnalign_sse2:
|
|
||||||
push ebx
|
|
||||||
push edx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
|
|
||||||
; for both top and bottom border
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; pDst
|
|
||||||
mov edx, [esp+28] ; kiStride
|
|
||||||
mov ebx, [esp+32] ; kiWidth
|
|
||||||
mov eax, [esp+36] ; kiHeight
|
|
||||||
; also prepare for cross border pData top-left: xmm3
|
|
||||||
; xor ecx, ecx
|
|
||||||
mov cl, byte [esi]
|
|
||||||
butterfly_1to16_sse xmm3, xmm4, c ; pDst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; load top border
|
|
||||||
mov ecx, edx ; kiStride
|
|
||||||
neg ecx ; -kiStride
|
|
||||||
lea edi, [esi+ecx] ; last line of top border
|
|
||||||
; load bottom border
|
|
||||||
dec eax ; h-1
|
|
||||||
imul eax, edx ; (h-1)*kiStride
|
|
||||||
lea eax, [esi+eax] ; last line of picture pData
|
|
||||||
sal edx, 04h ; 16*kiStride
|
|
||||||
lea ebp, [eax+edx] ; last line of bottom border, (h-1)*kiStride + 16 * kiStride
|
|
||||||
; also prepare for cross border pData: bottom-left with xmm5, bottom-right xmm6
|
|
||||||
dec ebx ; kiWidth-1
|
|
||||||
lea ebx, [eax+ebx] ; dst[w-1][h-1]
|
|
||||||
; xor edx, edx
|
|
||||||
mov dl, byte [eax] ; bottom-left
|
|
||||||
butterfly_1to16_sse xmm5, xmm6, d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
mov dl, byte [ebx] ; bottom-right
|
|
||||||
butterfly_1to16_sse xmm6, xmm4, d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; for top & bottom expanding
|
|
||||||
mov ebx, [esp+32] ; kiWidth
|
|
||||||
exp_top_bottom_sse2 16
|
|
||||||
|
|
||||||
; for both left and right border
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; p_dst: left border pSrc
|
|
||||||
mov edx, [esp+28] ; kiStride
|
|
||||||
mov ebx, [esp+32] ; kiWidth
|
|
||||||
mov ecx, [esp+36] ; kiHeight
|
|
||||||
; load left border
|
|
||||||
mov eax, -16 ; luma=-32, chroma=-16
|
|
||||||
lea edi, [esi+eax] ; left border dst
|
|
||||||
dec ebx
|
|
||||||
lea ebx, [esi+ebx] ; right border pSrc, (p_dst + width - 1)
|
|
||||||
lea ebp, [ebx+1] ; right border dst
|
|
||||||
; prepare for cross border pData: top-right with xmm4
|
|
||||||
; xor eax, eax
|
|
||||||
mov al, byte [ebx] ; top-right
|
|
||||||
butterfly_1to16_sse xmm4, xmm0, a ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; for left & right border expanding
|
|
||||||
exp_left_right_sse2 16, u
|
|
||||||
|
|
||||||
; for cross border [top-left, top-right, bottom-left, bottom-right]
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; p_dst
|
|
||||||
mov ecx, [esp+28] ; kiStride
|
|
||||||
mov ebx, [esp+32] ; kiWidth
|
|
||||||
mov edx, [esp+36] ; kiHeight
|
|
||||||
; have done xmm3,..,xmm6 cross pData initialization above, perform pading as below, To be continued..
|
|
||||||
neg ecx ; -kiStride
|
|
||||||
mov eax, -16 ; chroma=-16
|
|
||||||
lea edi, [esi+eax]
|
|
||||||
lea edi, [edi+ecx] ; last line of top-left border
|
|
||||||
lea ebp, [esi+ebx]
|
|
||||||
lea ebp, [ebp+ecx] ; last line of top-right border
|
|
||||||
mov ecx, [esp+28] ; kiStride
|
|
||||||
add edx, 16 ; kiHeight+16, luma=32, chroma=16
|
|
||||||
imul edx, ecx ; (kiHeight+16) * kiStride
|
|
||||||
lea eax, [edi+edx] ; last line of bottom-left border
|
|
||||||
lea ebx, [ebp+edx] ; last line of bottom-right border
|
|
||||||
neg ecx ; -kiStride
|
|
||||||
; for left & right border expanding
|
|
||||||
exp_cross_sse2 16, u
|
|
||||||
|
|
||||||
; sfence ; commit cache write back memory
|
|
||||||
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop edx
|
|
||||||
pop ebx
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,330 +0,0 @@
|
|||||||
;*!
|
|
||||||
;* \copy
|
|
||||||
;* Copyright (c) 2009-2013, Cisco Systems
|
|
||||||
;* All rights reserved.
|
|
||||||
;*
|
|
||||||
;* Redistribution and use in source and binary forms, with or without
|
|
||||||
;* modification, are permitted provided that the following conditions
|
|
||||||
;* are met:
|
|
||||||
;*
|
|
||||||
;* * Redistributions of source code must retain the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer.
|
|
||||||
;*
|
|
||||||
;* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer in
|
|
||||||
;* the documentation and/or other materials provided with the
|
|
||||||
;* distribution.
|
|
||||||
;*
|
|
||||||
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
;* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;* mb_copy.asm
|
|
||||||
;*
|
|
||||||
;* Abstract
|
|
||||||
;* mb_copy and mb_copy1
|
|
||||||
;*
|
|
||||||
;* History
|
|
||||||
;* 15/09/2009 Created
|
|
||||||
;* 12/28/2009 Modified with larger throughput
|
|
||||||
;* 12/29/2011 Tuned WelsCopy16x16NotAligned_sse2, added UpdateMbMv_sse2 WelsCopy16x8NotAligned_sse2,
|
|
||||||
;* WelsCopy16x8_mmx, WelsCopy8x16_mmx etc;
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;*********************************************************************************************/
|
|
||||||
%include "asm_inc.asm"
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; Macros and other preprocessor constants
|
|
||||||
;*******************************************************************************
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; Local Data (Read Only)
|
|
||||||
;*******************************************************************************
|
|
||||||
|
|
||||||
;SECTION .rodata data align=16
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; Various memory constants (trigonometric values or rounding values)
|
|
||||||
;*******************************************************************************
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; Code
|
|
||||||
;*******************************************************************************
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
WELS_EXTERN PixelAvgWidthEq4_mmx
|
|
||||||
WELS_EXTERN PixelAvgWidthEq8_mmx
|
|
||||||
WELS_EXTERN PixelAvgWidthEq16_sse2
|
|
||||||
|
|
||||||
WELS_EXTERN McCopyWidthEq4_mmx
|
|
||||||
WELS_EXTERN McCopyWidthEq8_mmx
|
|
||||||
WELS_EXTERN McCopyWidthEq16_sse2
|
|
||||||
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;*******************************************************************************
|
|
||||||
; void_t PixelAvgWidthEq4_mmx( uint8_t *pDst, int iDstStride,
|
|
||||||
; uint8_t *pSrcA, int iSrcAStride,
|
|
||||||
; uint8_t *pSrcB, int iSrcBStride,
|
|
||||||
; int iHeight );
|
|
||||||
;*******************************************************************************
|
|
||||||
PixelAvgWidthEq4_mmx:
|
|
||||||
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
push ebx
|
|
||||||
|
|
||||||
mov edi, [esp+20] ; pDst
|
|
||||||
mov eax, [esp+24] ; iDstStride
|
|
||||||
mov esi, [esp+28] ; pSrcA
|
|
||||||
mov ecx, [esp+32] ; iSrcAStride
|
|
||||||
mov ebp, [esp+36] ; pSrcB
|
|
||||||
mov edx, [esp+40] ; iSrcBStride
|
|
||||||
mov ebx, [esp+44] ; iHeight
|
|
||||||
ALIGN 4
|
|
||||||
.height_loop:
|
|
||||||
movd mm0, [ebp]
|
|
||||||
pavgb mm0, [esi]
|
|
||||||
movd [edi], mm0
|
|
||||||
|
|
||||||
dec ebx
|
|
||||||
lea edi, [edi+eax]
|
|
||||||
lea esi, [esi+ecx]
|
|
||||||
lea ebp, [ebp+edx]
|
|
||||||
jne .height_loop
|
|
||||||
|
|
||||||
WELSEMMS
|
|
||||||
pop ebx
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;*******************************************************************************
|
|
||||||
; void_t PixelAvgWidthEq8_mmx( uint8_t *pDst, int iDstStride,
|
|
||||||
; uint8_t *pSrcA, int iSrcAStride,
|
|
||||||
; uint8_t *pSrcB, int iSrcBStride,
|
|
||||||
; int iHeight );
|
|
||||||
;*******************************************************************************
|
|
||||||
PixelAvgWidthEq8_mmx:
|
|
||||||
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
push ebx
|
|
||||||
|
|
||||||
mov edi, [esp+20] ; pDst
|
|
||||||
mov eax, [esp+24] ; iDstStride
|
|
||||||
mov esi, [esp+28] ; pSrcA
|
|
||||||
mov ecx, [esp+32] ; iSrcAStride
|
|
||||||
mov ebp, [esp+36] ; pSrcB
|
|
||||||
mov edx, [esp+40] ; iSrcBStride
|
|
||||||
mov ebx, [esp+44] ; iHeight
|
|
||||||
ALIGN 4
|
|
||||||
.height_loop:
|
|
||||||
movq mm0, [esi]
|
|
||||||
pavgb mm0, [ebp]
|
|
||||||
movq [edi], mm0
|
|
||||||
movq mm0, [esi+ecx]
|
|
||||||
pavgb mm0, [ebp+edx]
|
|
||||||
movq [edi+eax], mm0
|
|
||||||
|
|
||||||
lea esi, [esi+2*ecx]
|
|
||||||
lea ebp, [ebp+2*edx]
|
|
||||||
lea edi, [edi+2*eax]
|
|
||||||
|
|
||||||
sub ebx, 2
|
|
||||||
jnz .height_loop
|
|
||||||
|
|
||||||
WELSEMMS
|
|
||||||
pop ebx
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;*******************************************************************************
|
|
||||||
; void_t PixelAvgWidthEq16_sse2( uint8_t *pDst, int iDstStride,
|
|
||||||
; uint8_t *pSrcA, int iSrcAStride,
|
|
||||||
; uint8_t *pSrcB, int iSrcBStride,
|
|
||||||
; int iHeight );
|
|
||||||
;*******************************************************************************
|
|
||||||
PixelAvgWidthEq16_sse2:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
push ebx
|
|
||||||
|
|
||||||
|
|
||||||
mov edi, [esp+20] ; pDst
|
|
||||||
mov eax, [esp+24] ; iDstStride
|
|
||||||
mov esi, [esp+28] ; pSrcA
|
|
||||||
mov ecx, [esp+32] ; iSrcAStride
|
|
||||||
mov ebp, [esp+36] ; pSrcB
|
|
||||||
mov edx, [esp+40] ; iSrcBStride
|
|
||||||
mov ebx, [esp+44] ; iHeight
|
|
||||||
ALIGN 4
|
|
||||||
.height_loop:
|
|
||||||
movdqu xmm0, [esi]
|
|
||||||
pavgb xmm0, [ebp]
|
|
||||||
movdqu [edi], xmm0
|
|
||||||
|
|
||||||
movdqu xmm0, [esi+ecx]
|
|
||||||
pavgb xmm0, [ebp+edx]
|
|
||||||
movdqu [edi+eax], xmm0
|
|
||||||
|
|
||||||
movdqu xmm0, [esi+2*ecx]
|
|
||||||
pavgb xmm0, [ebp+2*edx]
|
|
||||||
movdqu [edi+2*eax], xmm0
|
|
||||||
|
|
||||||
lea esi, [esi+2*ecx]
|
|
||||||
lea ebp, [ebp+2*edx]
|
|
||||||
lea edi, [edi+2*eax]
|
|
||||||
|
|
||||||
movdqu xmm0, [esi+ecx]
|
|
||||||
pavgb xmm0, [ebp+edx]
|
|
||||||
movdqu [edi+eax], xmm0
|
|
||||||
|
|
||||||
lea esi, [esi+2*ecx]
|
|
||||||
lea ebp, [ebp+2*edx]
|
|
||||||
lea edi, [edi+2*eax]
|
|
||||||
|
|
||||||
|
|
||||||
sub ebx, 4
|
|
||||||
jne .height_loop
|
|
||||||
|
|
||||||
WELSEMMS
|
|
||||||
pop ebx
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;*******************************************************************************
|
|
||||||
; void_t McCopyWidthEq4_mmx( uint8_t *pSrc, int iSrcStride,
|
|
||||||
; uint8_t *pDst, int iDstStride, int iHeight )
|
|
||||||
;*******************************************************************************
|
|
||||||
McCopyWidthEq4_mmx:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebx
|
|
||||||
|
|
||||||
|
|
||||||
mov esi, [esp+16]
|
|
||||||
mov eax, [esp+20]
|
|
||||||
mov edi, [esp+24]
|
|
||||||
mov ecx, [esp+28]
|
|
||||||
mov edx, [esp+32]
|
|
||||||
ALIGN 4
|
|
||||||
.height_loop:
|
|
||||||
mov ebx, [esi]
|
|
||||||
mov [edi], ebx
|
|
||||||
|
|
||||||
add esi, eax
|
|
||||||
add edi, ecx
|
|
||||||
dec edx
|
|
||||||
jnz .height_loop
|
|
||||||
WELSEMMS
|
|
||||||
pop ebx
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;*******************************************************************************
|
|
||||||
; void_t McCopyWidthEq8_mmx( uint8_t *pSrc, int iSrcStride,
|
|
||||||
; uint8_t *pDst, int iDstStride, int iHeight )
|
|
||||||
;*******************************************************************************
|
|
||||||
McCopyWidthEq8_mmx:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
mov esi, [esp+12]
|
|
||||||
mov eax, [esp+16]
|
|
||||||
mov edi, [esp+20]
|
|
||||||
mov ecx, [esp+24]
|
|
||||||
mov edx, [esp+28]
|
|
||||||
|
|
||||||
ALIGN 4
|
|
||||||
.height_loop:
|
|
||||||
movq mm0, [esi]
|
|
||||||
movq [edi], mm0
|
|
||||||
add esi, eax
|
|
||||||
add edi, ecx
|
|
||||||
dec edx
|
|
||||||
jnz .height_loop
|
|
||||||
|
|
||||||
WELSEMMS
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;*******************************************************************************
|
|
||||||
; void_t McCopyWidthEq16_sse2( uint8_t *pSrc, int iSrcStride, uint8_t *pDst, int iDstStride, int iHeight )
|
|
||||||
;*******************************************************************************
|
|
||||||
;read unaligned memory
|
|
||||||
%macro SSE_READ_UNA 2
|
|
||||||
movq %1, [%2]
|
|
||||||
movhps %1, [%2+8]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;write unaligned memory
|
|
||||||
%macro SSE_WRITE_UNA 2
|
|
||||||
movq [%1], %2
|
|
||||||
movhps [%1+8], %2
|
|
||||||
%endmacro
|
|
||||||
McCopyWidthEq16_sse2:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
|
|
||||||
mov esi, [esp+12] ; pSrc
|
|
||||||
mov eax, [esp+16] ; iSrcStride
|
|
||||||
mov edi, [esp+20] ; pDst
|
|
||||||
mov edx, [esp+24] ; iDstStride
|
|
||||||
mov ecx, [esp+28] ; iHeight
|
|
||||||
|
|
||||||
ALIGN 4
|
|
||||||
.height_loop:
|
|
||||||
SSE_READ_UNA xmm0, esi
|
|
||||||
SSE_READ_UNA xmm1, esi+eax
|
|
||||||
SSE_WRITE_UNA edi, xmm0
|
|
||||||
SSE_WRITE_UNA edi+edx, xmm1
|
|
||||||
|
|
||||||
sub ecx, 2
|
|
||||||
lea esi, [esi+eax*2]
|
|
||||||
lea edi, [edi+edx*2]
|
|
||||||
jnz .height_loop
|
|
||||||
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
@@ -1,317 +0,0 @@
|
|||||||
;*!
|
|
||||||
;* \copy
|
|
||||||
;* Copyright (c) 2004-2013, Cisco Systems
|
|
||||||
;* All rights reserved.
|
|
||||||
;*
|
|
||||||
;* Redistribution and use in source and binary forms, with or without
|
|
||||||
;* modification, are permitted provided that the following conditions
|
|
||||||
;* are met:
|
|
||||||
;*
|
|
||||||
;* * Redistributions of source code must retain the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer.
|
|
||||||
;*
|
|
||||||
;* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer in
|
|
||||||
;* the documentation and/or other materials provided with the
|
|
||||||
;* distribution.
|
|
||||||
;*
|
|
||||||
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
;* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;* mc_chroma.asm
|
|
||||||
;*
|
|
||||||
;* Abstract
|
|
||||||
;* mmx motion compensation for chroma
|
|
||||||
;*
|
|
||||||
;* History
|
|
||||||
;* 10/13/2004 Created
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;*************************************************************************/
|
|
||||||
%include "asm_inc.asm"
|
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Local Data (Read Only)
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
SECTION .rodata align=16
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Various memory constants (trigonometric values or rounding values)
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
h264_d0x20_sse2:
|
|
||||||
dw 32,32,32,32,32,32,32,32
|
|
||||||
ALIGN 16
|
|
||||||
h264_d0x20_mmx:
|
|
||||||
dw 32,32,32,32
|
|
||||||
|
|
||||||
|
|
||||||
;=============================================================================
|
|
||||||
; Code
|
|
||||||
;=============================================================================
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;*******************************************************************************
|
|
||||||
; void McChromaWidthEq4_mmx( uint8_t *src,
|
|
||||||
; int32_t iSrcStride,
|
|
||||||
; uint8_t *pDst,
|
|
||||||
; int32_t iDstStride,
|
|
||||||
; uint8_t *pABCD,
|
|
||||||
; int32_t iHeigh );
|
|
||||||
;*******************************************************************************
|
|
||||||
WELS_EXTERN McChromaWidthEq4_mmx
|
|
||||||
McChromaWidthEq4_mmx:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebx
|
|
||||||
|
|
||||||
mov eax, [esp +12 + 20]
|
|
||||||
movd mm3, [eax]
|
|
||||||
WELS_Zero mm7
|
|
||||||
punpcklbw mm3, mm3
|
|
||||||
movq mm4, mm3
|
|
||||||
punpcklwd mm3, mm3
|
|
||||||
punpckhwd mm4, mm4
|
|
||||||
|
|
||||||
movq mm5, mm3
|
|
||||||
punpcklbw mm3, mm7
|
|
||||||
punpckhbw mm5, mm7
|
|
||||||
|
|
||||||
movq mm6, mm4
|
|
||||||
punpcklbw mm4, mm7
|
|
||||||
punpckhbw mm6, mm7
|
|
||||||
|
|
||||||
mov esi, [esp +12+ 4]
|
|
||||||
mov eax, [esp + 12 + 8]
|
|
||||||
mov edi, [esp + 12 + 12]
|
|
||||||
mov edx, [esp + 12 + 16]
|
|
||||||
mov ecx, [esp + 12 + 24]
|
|
||||||
|
|
||||||
lea ebx, [esi + eax]
|
|
||||||
movd mm0, [esi]
|
|
||||||
movd mm1, [esi+1]
|
|
||||||
punpcklbw mm0, mm7
|
|
||||||
punpcklbw mm1, mm7
|
|
||||||
.xloop:
|
|
||||||
|
|
||||||
pmullw mm0, mm3
|
|
||||||
pmullw mm1, mm5
|
|
||||||
paddw mm0, mm1
|
|
||||||
|
|
||||||
movd mm1, [ebx]
|
|
||||||
punpcklbw mm1, mm7
|
|
||||||
movq mm2, mm1
|
|
||||||
pmullw mm1, mm4
|
|
||||||
paddw mm0, mm1
|
|
||||||
|
|
||||||
movd mm1, [ebx+1]
|
|
||||||
punpcklbw mm1, mm7
|
|
||||||
movq mm7, mm1
|
|
||||||
pmullw mm1,mm6
|
|
||||||
paddw mm0, mm1
|
|
||||||
movq mm1,mm7
|
|
||||||
|
|
||||||
paddw mm0, [h264_d0x20_mmx]
|
|
||||||
psrlw mm0, 6
|
|
||||||
|
|
||||||
WELS_Zero mm7
|
|
||||||
packuswb mm0, mm7
|
|
||||||
movd [edi], mm0
|
|
||||||
|
|
||||||
movq mm0, mm2
|
|
||||||
|
|
||||||
lea edi, [edi +edx ]
|
|
||||||
lea ebx, [ebx + eax]
|
|
||||||
|
|
||||||
dec ecx
|
|
||||||
jnz near .xloop
|
|
||||||
WELSEMMS
|
|
||||||
pop ebx
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;*******************************************************************************
|
|
||||||
; void McChromaWidthEq8_sse2( uint8_t *pSrc,
|
|
||||||
; int32_t iSrcStride,
|
|
||||||
; uint8_t *pDst,
|
|
||||||
; int32_t iDstStride,
|
|
||||||
; uint8_t *pABCD,
|
|
||||||
; int32_t iheigh );
|
|
||||||
;*******************************************************************************
|
|
||||||
WELS_EXTERN McChromaWidthEq8_sse2
|
|
||||||
McChromaWidthEq8_sse2:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebx
|
|
||||||
|
|
||||||
mov eax, [esp +12 + 20]
|
|
||||||
movd xmm3, [eax]
|
|
||||||
WELS_Zero xmm7
|
|
||||||
punpcklbw xmm3, xmm3
|
|
||||||
punpcklwd xmm3, xmm3
|
|
||||||
|
|
||||||
movdqa xmm4, xmm3
|
|
||||||
punpckldq xmm3, xmm3
|
|
||||||
punpckhdq xmm4, xmm4
|
|
||||||
movdqa xmm5, xmm3
|
|
||||||
movdqa xmm6, xmm4
|
|
||||||
|
|
||||||
punpcklbw xmm3, xmm7
|
|
||||||
punpckhbw xmm5, xmm7
|
|
||||||
punpcklbw xmm4, xmm7
|
|
||||||
punpckhbw xmm6, xmm7
|
|
||||||
|
|
||||||
mov esi, [esp +12+ 4]
|
|
||||||
mov eax, [esp + 12 + 8]
|
|
||||||
mov edi, [esp + 12 + 12]
|
|
||||||
mov edx, [esp + 12 + 16]
|
|
||||||
mov ecx, [esp + 12 + 24]
|
|
||||||
|
|
||||||
lea ebx, [esi + eax]
|
|
||||||
movq xmm0, [esi]
|
|
||||||
movq xmm1, [esi+1]
|
|
||||||
punpcklbw xmm0, xmm7
|
|
||||||
punpcklbw xmm1, xmm7
|
|
||||||
.xloop:
|
|
||||||
|
|
||||||
pmullw xmm0, xmm3
|
|
||||||
pmullw xmm1, xmm5
|
|
||||||
paddw xmm0, xmm1
|
|
||||||
|
|
||||||
movq xmm1, [ebx]
|
|
||||||
punpcklbw xmm1, xmm7
|
|
||||||
movdqa xmm2, xmm1
|
|
||||||
pmullw xmm1, xmm4
|
|
||||||
paddw xmm0, xmm1
|
|
||||||
|
|
||||||
movq xmm1, [ebx+1]
|
|
||||||
punpcklbw xmm1, xmm7
|
|
||||||
movdqa xmm7, xmm1
|
|
||||||
pmullw xmm1, xmm6
|
|
||||||
paddw xmm0, xmm1
|
|
||||||
movdqa xmm1,xmm7
|
|
||||||
|
|
||||||
paddw xmm0, [h264_d0x20_sse2]
|
|
||||||
psrlw xmm0, 6
|
|
||||||
|
|
||||||
WELS_Zero xmm7
|
|
||||||
packuswb xmm0, xmm7
|
|
||||||
movq [edi], xmm0
|
|
||||||
|
|
||||||
movdqa xmm0, xmm2
|
|
||||||
|
|
||||||
lea edi, [edi +edx ]
|
|
||||||
lea ebx, [ebx + eax]
|
|
||||||
|
|
||||||
dec ecx
|
|
||||||
jnz near .xloop
|
|
||||||
|
|
||||||
pop ebx
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************
|
|
||||||
; void McChromaWidthEq8_ssse3( uint8_t *pSrc,
|
|
||||||
; int32_t iSrcStride,
|
|
||||||
; uint8_t *pDst,
|
|
||||||
; int32_t iDstStride,
|
|
||||||
; uint8_t *pABCD,
|
|
||||||
; int32_t iHeigh);
|
|
||||||
;***********************************************************************
|
|
||||||
WELS_EXTERN McChromaWidthEq8_ssse3
|
|
||||||
McChromaWidthEq8_ssse3:
|
|
||||||
push ebx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
|
|
||||||
mov eax, [esp + 12 + 20]
|
|
||||||
|
|
||||||
pxor xmm7, xmm7
|
|
||||||
movd xmm5, [eax]
|
|
||||||
punpcklwd xmm5, xmm5
|
|
||||||
punpckldq xmm5, xmm5
|
|
||||||
movdqa xmm6, xmm5
|
|
||||||
punpcklqdq xmm5, xmm5
|
|
||||||
punpckhqdq xmm6, xmm6
|
|
||||||
|
|
||||||
mov eax, [esp + 12 + 4]
|
|
||||||
mov edx, [esp + 12 + 8]
|
|
||||||
mov esi, [esp + 12 + 12]
|
|
||||||
mov edi, [esp + 12 + 16]
|
|
||||||
mov ecx, [esp + 12 + 24]
|
|
||||||
|
|
||||||
sub esi, edi
|
|
||||||
sub esi, edi
|
|
||||||
movdqa xmm7, [h264_d0x20_sse2]
|
|
||||||
|
|
||||||
movdqu xmm0, [eax]
|
|
||||||
movdqa xmm1, xmm0
|
|
||||||
psrldq xmm1, 1
|
|
||||||
punpcklbw xmm0, xmm1
|
|
||||||
|
|
||||||
.hloop_chroma:
|
|
||||||
lea esi, [esi+2*edi]
|
|
||||||
|
|
||||||
movdqu xmm2, [eax+edx]
|
|
||||||
movdqa xmm3, xmm2
|
|
||||||
psrldq xmm3, 1
|
|
||||||
punpcklbw xmm2, xmm3
|
|
||||||
movdqa xmm4, xmm2
|
|
||||||
|
|
||||||
pmaddubsw xmm0, xmm5
|
|
||||||
pmaddubsw xmm2, xmm6
|
|
||||||
paddw xmm0, xmm2
|
|
||||||
paddw xmm0, xmm7
|
|
||||||
psrlw xmm0, 6
|
|
||||||
packuswb xmm0, xmm0
|
|
||||||
movq [esi],xmm0
|
|
||||||
|
|
||||||
lea eax, [eax+2*edx]
|
|
||||||
movdqu xmm2, [eax]
|
|
||||||
movdqa xmm3, xmm2
|
|
||||||
psrldq xmm3, 1
|
|
||||||
punpcklbw xmm2, xmm3
|
|
||||||
movdqa xmm0, xmm2
|
|
||||||
|
|
||||||
pmaddubsw xmm4, xmm5
|
|
||||||
pmaddubsw xmm2, xmm6
|
|
||||||
paddw xmm4, xmm2
|
|
||||||
paddw xmm4, xmm7
|
|
||||||
psrlw xmm4, 6
|
|
||||||
packuswb xmm4, xmm4
|
|
||||||
movq [esi+edi],xmm4
|
|
||||||
|
|
||||||
sub ecx, 2
|
|
||||||
jnz .hloop_chroma
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,615 +0,0 @@
|
|||||||
;*!
|
|
||||||
;* \copy
|
|
||||||
;* Copyright (c) 2009-2013, Cisco Systems
|
|
||||||
;* All rights reserved.
|
|
||||||
;*
|
|
||||||
;* Redistribution and use in source and binary forms, with or without
|
|
||||||
;* modification, are permitted provided that the following conditions
|
|
||||||
;* are met:
|
|
||||||
;*
|
|
||||||
;* * Redistributions of source code must retain the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer.
|
|
||||||
;*
|
|
||||||
;* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer in
|
|
||||||
;* the documentation and/or other materials provided with the
|
|
||||||
;* distribution.
|
|
||||||
;*
|
|
||||||
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
;* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;* mc_luma.asm
|
|
||||||
;*
|
|
||||||
;* Abstract
|
|
||||||
;* sse2 motion compensation
|
|
||||||
;*
|
|
||||||
;* History
|
|
||||||
;* 17/08/2009 Created
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;*************************************************************************/
|
|
||||||
%include "asm_inc.asm"
|
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; Local Data (Read Only)
|
|
||||||
;*******************************************************************************
|
|
||||||
|
|
||||||
SECTION .rodata align=16
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; Various memory constants (trigonometric values or rounding values)
|
|
||||||
;*******************************************************************************
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
h264_w0x10:
|
|
||||||
dw 16, 16, 16, 16
|
|
||||||
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; Code
|
|
||||||
;*******************************************************************************
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
WELS_EXTERN McHorVer20WidthEq4_mmx
|
|
||||||
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;*******************************************************************************
|
|
||||||
; void_t McHorVer20WidthEq4_mmx( uint8_t *pSrc,
|
|
||||||
; int iSrcStride,
|
|
||||||
; uint8_t *pDst,
|
|
||||||
; int iDstStride,
|
|
||||||
; int iHeight)
|
|
||||||
;*******************************************************************************
|
|
||||||
McHorVer20WidthEq4_mmx:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
|
|
||||||
mov esi, [esp+12]
|
|
||||||
mov eax, [esp+16]
|
|
||||||
mov edi, [esp+20]
|
|
||||||
mov ecx, [esp+24]
|
|
||||||
mov edx, [esp+28]
|
|
||||||
sub esi, 2
|
|
||||||
WELS_Zero mm7
|
|
||||||
movq mm6, [h264_w0x10]
|
|
||||||
.height_loop:
|
|
||||||
movd mm0, [esi]
|
|
||||||
punpcklbw mm0, mm7
|
|
||||||
movd mm1, [esi+5]
|
|
||||||
punpcklbw mm1, mm7
|
|
||||||
movd mm2, [esi+1]
|
|
||||||
punpcklbw mm2, mm7
|
|
||||||
movd mm3, [esi+4]
|
|
||||||
punpcklbw mm3, mm7
|
|
||||||
movd mm4, [esi+2]
|
|
||||||
punpcklbw mm4, mm7
|
|
||||||
movd mm5, [esi+3]
|
|
||||||
punpcklbw mm5, mm7
|
|
||||||
|
|
||||||
paddw mm2, mm3
|
|
||||||
paddw mm4, mm5
|
|
||||||
psllw mm4, 2
|
|
||||||
psubw mm4, mm2
|
|
||||||
paddw mm0, mm1
|
|
||||||
paddw mm0, mm4
|
|
||||||
psllw mm4, 2
|
|
||||||
paddw mm0, mm4
|
|
||||||
paddw mm0, mm6
|
|
||||||
psraw mm0, 5
|
|
||||||
packuswb mm0, mm7
|
|
||||||
movd [edi], mm0
|
|
||||||
|
|
||||||
add esi, eax
|
|
||||||
add edi, ecx
|
|
||||||
dec edx
|
|
||||||
jnz .height_loop
|
|
||||||
|
|
||||||
WELSEMMS
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; Macros and other preprocessor constants
|
|
||||||
;*******************************************************************************
|
|
||||||
|
|
||||||
|
|
||||||
%macro SSE_LOAD_8P 3
|
|
||||||
movq %1, %3
|
|
||||||
punpcklbw %1, %2
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro FILTER_HV_W8 9
|
|
||||||
paddw %1, %6
|
|
||||||
movdqa %8, %3
|
|
||||||
movdqa %7, %2
|
|
||||||
paddw %1, [h264_w0x10_1]
|
|
||||||
paddw %8, %4
|
|
||||||
paddw %7, %5
|
|
||||||
psllw %8, 2
|
|
||||||
psubw %8, %7
|
|
||||||
paddw %1, %8
|
|
||||||
psllw %8, 2
|
|
||||||
paddw %1, %8
|
|
||||||
psraw %1, 5
|
|
||||||
WELS_Zero %8
|
|
||||||
packuswb %1, %8
|
|
||||||
movq %9, %1
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; Local Data (Read Only)
|
|
||||||
;*******************************************************************************
|
|
||||||
|
|
||||||
SECTION .rodata align=16
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; Various memory constants (trigonometric values or rounding values)
|
|
||||||
;*******************************************************************************
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
h264_w0x10_1:
|
|
||||||
dw 16, 16, 16, 16, 16, 16, 16, 16
|
|
||||||
ALIGN 16
|
|
||||||
h264_mc_hc_32:
|
|
||||||
dw 32, 32, 32, 32, 32, 32, 32, 32
|
|
||||||
;*******************************************************************************
|
|
||||||
; Code
|
|
||||||
;*******************************************************************************
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
WELS_EXTERN McHorVer22Width8HorFirst_sse2
|
|
||||||
WELS_EXTERN McHorVer22VerLast_sse2
|
|
||||||
WELS_EXTERN McHorVer02WidthEq8_sse2
|
|
||||||
WELS_EXTERN McHorVer20WidthEq8_sse2
|
|
||||||
WELS_EXTERN McHorVer20WidthEq16_sse2
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************
|
|
||||||
; void_t McHorVer22Width8HorFirst_sse2(int16_t *pSrc,
|
|
||||||
; int16_t iSrcStride,
|
|
||||||
; uint8_t *pDst,
|
|
||||||
; int32_t iDstStride
|
|
||||||
; int32_t iHeight
|
|
||||||
; )
|
|
||||||
;***********************************************************************
|
|
||||||
McHorVer22Width8HorFirst_sse2:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebx
|
|
||||||
mov esi, [esp+16] ;pSrc
|
|
||||||
mov eax, [esp+20] ;iSrcStride
|
|
||||||
mov edi, [esp+24] ;pDst
|
|
||||||
mov edx, [esp+28] ;iDstStride
|
|
||||||
mov ebx, [esp+32] ;iHeight
|
|
||||||
pxor xmm7, xmm7
|
|
||||||
|
|
||||||
sub esi, eax ;;;;;;;;need more 5 lines.
|
|
||||||
sub esi, eax
|
|
||||||
|
|
||||||
.yloop_width_8:
|
|
||||||
movq xmm0, [esi]
|
|
||||||
punpcklbw xmm0, xmm7
|
|
||||||
movq xmm1, [esi+5]
|
|
||||||
punpcklbw xmm1, xmm7
|
|
||||||
movq xmm2, [esi+1]
|
|
||||||
punpcklbw xmm2, xmm7
|
|
||||||
movq xmm3, [esi+4]
|
|
||||||
punpcklbw xmm3, xmm7
|
|
||||||
movq xmm4, [esi+2]
|
|
||||||
punpcklbw xmm4, xmm7
|
|
||||||
movq xmm5, [esi+3]
|
|
||||||
punpcklbw xmm5, xmm7
|
|
||||||
|
|
||||||
paddw xmm2, xmm3
|
|
||||||
paddw xmm4, xmm5
|
|
||||||
psllw xmm4, 2
|
|
||||||
psubw xmm4, xmm2
|
|
||||||
paddw xmm0, xmm1
|
|
||||||
paddw xmm0, xmm4
|
|
||||||
psllw xmm4, 2
|
|
||||||
paddw xmm0, xmm4
|
|
||||||
movdqa [edi], xmm0
|
|
||||||
|
|
||||||
add esi, eax
|
|
||||||
add edi, edx
|
|
||||||
dec ebx
|
|
||||||
jnz .yloop_width_8
|
|
||||||
pop ebx
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************
|
|
||||||
;void_t McHorVer22VerLast_sse2(
|
|
||||||
; uint8_t *pSrc,
|
|
||||||
; int32_t pSrcStride,
|
|
||||||
; uint8_t * pDst,
|
|
||||||
; int32_t iDstStride,
|
|
||||||
; int32_t iWidth,
|
|
||||||
; int32_t iHeight);
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
%macro FILTER_VER 9
|
|
||||||
paddw %1, %6
|
|
||||||
movdqa %7, %2
|
|
||||||
movdqa %8, %3
|
|
||||||
|
|
||||||
|
|
||||||
paddw %7, %5
|
|
||||||
paddw %8, %4
|
|
||||||
|
|
||||||
psubw %1, %7
|
|
||||||
psraw %1, 2
|
|
||||||
paddw %1, %8
|
|
||||||
psubw %1, %7
|
|
||||||
psraw %1, 2
|
|
||||||
paddw %8, %1
|
|
||||||
paddw %8, [h264_mc_hc_32]
|
|
||||||
psraw %8, 6
|
|
||||||
packuswb %8, %8
|
|
||||||
movq %9, %8
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
McHorVer22VerLast_sse2:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebx
|
|
||||||
push ebp
|
|
||||||
|
|
||||||
mov esi, [esp+20]
|
|
||||||
mov eax, [esp+24]
|
|
||||||
mov edi, [esp+28]
|
|
||||||
mov edx, [esp+32]
|
|
||||||
mov ebx, [esp+36]
|
|
||||||
mov ecx, [esp+40]
|
|
||||||
shr ebx, 3
|
|
||||||
|
|
||||||
.width_loop:
|
|
||||||
movdqa xmm0, [esi]
|
|
||||||
movdqa xmm1, [esi+eax]
|
|
||||||
lea esi, [esi+2*eax]
|
|
||||||
movdqa xmm2, [esi]
|
|
||||||
movdqa xmm3, [esi+eax]
|
|
||||||
lea esi, [esi+2*eax]
|
|
||||||
movdqa xmm4, [esi]
|
|
||||||
movdqa xmm5, [esi+eax]
|
|
||||||
|
|
||||||
FILTER_VER xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [edi]
|
|
||||||
dec ecx
|
|
||||||
lea esi, [esi+2*eax]
|
|
||||||
movdqa xmm6, [esi]
|
|
||||||
|
|
||||||
movdqa xmm0, xmm1
|
|
||||||
movdqa xmm1, xmm2
|
|
||||||
movdqa xmm2, xmm3
|
|
||||||
movdqa xmm3, xmm4
|
|
||||||
movdqa xmm4, xmm5
|
|
||||||
movdqa xmm5, xmm6
|
|
||||||
|
|
||||||
add edi, edx
|
|
||||||
sub esi, eax
|
|
||||||
|
|
||||||
.start:
|
|
||||||
FILTER_VER xmm0,xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [edi]
|
|
||||||
dec ecx
|
|
||||||
jz near .x_loop_dec
|
|
||||||
|
|
||||||
lea esi, [esi+2*eax]
|
|
||||||
movdqa xmm6, [esi]
|
|
||||||
FILTER_VER xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0,[edi+edx]
|
|
||||||
dec ecx
|
|
||||||
jz near .x_loop_dec
|
|
||||||
|
|
||||||
lea edi, [edi+2*edx]
|
|
||||||
movdqa xmm7, [esi+eax]
|
|
||||||
FILTER_VER xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, [edi]
|
|
||||||
dec ecx
|
|
||||||
jz near .x_loop_dec
|
|
||||||
|
|
||||||
lea esi, [esi+2*eax]
|
|
||||||
movdqa xmm0, [esi]
|
|
||||||
FILTER_VER xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2,[edi+edx]
|
|
||||||
dec ecx
|
|
||||||
jz near .x_loop_dec
|
|
||||||
|
|
||||||
lea edi, [edi+2*edx]
|
|
||||||
movdqa xmm1, [esi+eax]
|
|
||||||
FILTER_VER xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3,[edi]
|
|
||||||
dec ecx
|
|
||||||
jz near .x_loop_dec
|
|
||||||
|
|
||||||
lea esi, [esi+2*eax]
|
|
||||||
movdqa xmm2, [esi]
|
|
||||||
FILTER_VER xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3,xmm4,[edi+edx]
|
|
||||||
dec ecx
|
|
||||||
jz near .x_loop_dec
|
|
||||||
|
|
||||||
lea edi, [edi+2*edx]
|
|
||||||
movdqa xmm3, [esi+eax]
|
|
||||||
FILTER_VER xmm6, xmm7, xmm0, xmm1, xmm2, xmm3,xmm4,xmm5,[edi]
|
|
||||||
dec ecx
|
|
||||||
jz near .x_loop_dec
|
|
||||||
|
|
||||||
lea esi, [esi+2*eax]
|
|
||||||
movdqa xmm4, [esi]
|
|
||||||
FILTER_VER xmm7, xmm0, xmm1, xmm2, xmm3,xmm4,xmm5,xmm6, [edi+edx]
|
|
||||||
dec ecx
|
|
||||||
jz near .x_loop_dec
|
|
||||||
|
|
||||||
lea edi, [edi+2*edx]
|
|
||||||
movdqa xmm5, [esi+eax]
|
|
||||||
jmp near .start
|
|
||||||
|
|
||||||
.x_loop_dec:
|
|
||||||
dec ebx
|
|
||||||
jz near .exit
|
|
||||||
mov esi, [esp+20]
|
|
||||||
mov edi, [esp+28]
|
|
||||||
mov ecx, [esp+40]
|
|
||||||
add esi, 16
|
|
||||||
add edi, 8
|
|
||||||
jmp .width_loop
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
.exit:
|
|
||||||
pop ebp
|
|
||||||
pop ebx
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;*******************************************************************************
|
|
||||||
; void_t McHorVer20WidthEq8_sse2( uint8_t *pSrc,
|
|
||||||
; int iSrcStride,
|
|
||||||
; uint8_t *pDst,
|
|
||||||
; int iDstStride,
|
|
||||||
; int iHeight,
|
|
||||||
; );
|
|
||||||
;*******************************************************************************
|
|
||||||
McHorVer20WidthEq8_sse2:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
|
|
||||||
mov esi, [esp + 12] ;pSrc
|
|
||||||
mov eax, [esp + 16] ;iSrcStride
|
|
||||||
mov edi, [esp + 20] ;pDst
|
|
||||||
mov ecx, [esp + 28] ;iHeight
|
|
||||||
mov edx, [esp + 24] ;iDstStride
|
|
||||||
|
|
||||||
lea esi, [esi-2] ;pSrc -= 2;
|
|
||||||
|
|
||||||
pxor xmm7, xmm7
|
|
||||||
movdqa xmm6, [h264_w0x10_1]
|
|
||||||
.y_loop:
|
|
||||||
movq xmm0, [esi]
|
|
||||||
punpcklbw xmm0, xmm7
|
|
||||||
movq xmm1, [esi+5]
|
|
||||||
punpcklbw xmm1, xmm7
|
|
||||||
movq xmm2, [esi+1]
|
|
||||||
punpcklbw xmm2, xmm7
|
|
||||||
movq xmm3, [esi+4]
|
|
||||||
punpcklbw xmm3, xmm7
|
|
||||||
movq xmm4, [esi+2]
|
|
||||||
punpcklbw xmm4, xmm7
|
|
||||||
movq xmm5, [esi+3]
|
|
||||||
punpcklbw xmm5, xmm7
|
|
||||||
|
|
||||||
paddw xmm2, xmm3
|
|
||||||
paddw xmm4, xmm5
|
|
||||||
psllw xmm4, 2
|
|
||||||
psubw xmm4, xmm2
|
|
||||||
paddw xmm0, xmm1
|
|
||||||
paddw xmm0, xmm4
|
|
||||||
psllw xmm4, 2
|
|
||||||
paddw xmm0, xmm4
|
|
||||||
paddw xmm0, xmm6
|
|
||||||
psraw xmm0, 5
|
|
||||||
|
|
||||||
packuswb xmm0, xmm7
|
|
||||||
movq [edi], xmm0
|
|
||||||
|
|
||||||
lea edi, [edi+edx]
|
|
||||||
lea esi, [esi+eax]
|
|
||||||
dec ecx
|
|
||||||
jnz near .y_loop
|
|
||||||
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;*******************************************************************************
|
|
||||||
; void_t McHorVer20WidthEq16_sse2( uint8_t *pSrc,
|
|
||||||
; int iSrcStride,
|
|
||||||
; uint8_t *pDst,
|
|
||||||
; int iDstStride,
|
|
||||||
; int iHeight,
|
|
||||||
; );
|
|
||||||
;*******************************************************************************
|
|
||||||
McHorVer20WidthEq16_sse2:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
|
|
||||||
|
|
||||||
mov esi, [esp + 12] ;pSrc
|
|
||||||
mov eax, [esp + 16] ;iSrcStride
|
|
||||||
mov edi, [esp + 20] ;pDst
|
|
||||||
mov ecx, [esp + 28] ;iHeight
|
|
||||||
mov edx, [esp + 24] ;iDstStride
|
|
||||||
|
|
||||||
lea esi, [esi-2] ;pSrc -= 2;
|
|
||||||
|
|
||||||
pxor xmm7, xmm7
|
|
||||||
movdqa xmm6, [h264_w0x10_1]
|
|
||||||
.y_loop:
|
|
||||||
|
|
||||||
movq xmm0, [esi]
|
|
||||||
punpcklbw xmm0, xmm7
|
|
||||||
movq xmm1, [esi+5]
|
|
||||||
punpcklbw xmm1, xmm7
|
|
||||||
movq xmm2, [esi+1]
|
|
||||||
punpcklbw xmm2, xmm7
|
|
||||||
movq xmm3, [esi+4]
|
|
||||||
punpcklbw xmm3, xmm7
|
|
||||||
movq xmm4, [esi+2]
|
|
||||||
punpcklbw xmm4, xmm7
|
|
||||||
movq xmm5, [esi+3]
|
|
||||||
punpcklbw xmm5, xmm7
|
|
||||||
|
|
||||||
paddw xmm2, xmm3
|
|
||||||
paddw xmm4, xmm5
|
|
||||||
psllw xmm4, 2
|
|
||||||
psubw xmm4, xmm2
|
|
||||||
paddw xmm0, xmm1
|
|
||||||
paddw xmm0, xmm4
|
|
||||||
psllw xmm4, 2
|
|
||||||
paddw xmm0, xmm4
|
|
||||||
paddw xmm0, xmm6
|
|
||||||
psraw xmm0, 5
|
|
||||||
packuswb xmm0, xmm7
|
|
||||||
movq [edi], xmm0
|
|
||||||
|
|
||||||
movq xmm0, [esi+8]
|
|
||||||
punpcklbw xmm0, xmm7
|
|
||||||
movq xmm1, [esi+5+8]
|
|
||||||
punpcklbw xmm1, xmm7
|
|
||||||
movq xmm2, [esi+1+8]
|
|
||||||
punpcklbw xmm2, xmm7
|
|
||||||
movq xmm3, [esi+4+8]
|
|
||||||
punpcklbw xmm3, xmm7
|
|
||||||
movq xmm4, [esi+2+8]
|
|
||||||
punpcklbw xmm4, xmm7
|
|
||||||
movq xmm5, [esi+3+8]
|
|
||||||
punpcklbw xmm5, xmm7
|
|
||||||
|
|
||||||
paddw xmm2, xmm3
|
|
||||||
paddw xmm4, xmm5
|
|
||||||
psllw xmm4, 2
|
|
||||||
psubw xmm4, xmm2
|
|
||||||
paddw xmm0, xmm1
|
|
||||||
paddw xmm0, xmm4
|
|
||||||
psllw xmm4, 2
|
|
||||||
paddw xmm0, xmm4
|
|
||||||
paddw xmm0, xmm6
|
|
||||||
psraw xmm0, 5
|
|
||||||
packuswb xmm0, xmm7
|
|
||||||
movq [edi+8], xmm0
|
|
||||||
|
|
||||||
lea edi, [edi+edx]
|
|
||||||
lea esi, [esi+eax]
|
|
||||||
dec ecx
|
|
||||||
jnz near .y_loop
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
;*******************************************************************************
|
|
||||||
; void_t McHorVer02WidthEq8_sse2( uint8_t *pSrc,
|
|
||||||
; int iSrcStride,
|
|
||||||
; uint8_t *pDst,
|
|
||||||
; int iDstStride,
|
|
||||||
; int iHeight )
|
|
||||||
;*******************************************************************************
|
|
||||||
ALIGN 16
|
|
||||||
McHorVer02WidthEq8_sse2:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
|
|
||||||
mov esi, [esp + 12] ;pSrc
|
|
||||||
mov edx, [esp + 16] ;iSrcStride
|
|
||||||
mov edi, [esp + 20] ;pDst
|
|
||||||
mov eax, [esp + 24] ;iDstStride
|
|
||||||
mov ecx, [esp + 28] ;iHeight
|
|
||||||
|
|
||||||
sub esi, edx
|
|
||||||
sub esi, edx
|
|
||||||
|
|
||||||
WELS_Zero xmm7
|
|
||||||
|
|
||||||
SSE_LOAD_8P xmm0, xmm7, [esi]
|
|
||||||
SSE_LOAD_8P xmm1, xmm7, [esi+edx]
|
|
||||||
lea esi, [esi+2*edx]
|
|
||||||
SSE_LOAD_8P xmm2, xmm7, [esi]
|
|
||||||
SSE_LOAD_8P xmm3, xmm7, [esi+edx]
|
|
||||||
lea esi, [esi+2*edx]
|
|
||||||
SSE_LOAD_8P xmm4, xmm7, [esi]
|
|
||||||
SSE_LOAD_8P xmm5, xmm7, [esi+edx]
|
|
||||||
|
|
||||||
.start:
|
|
||||||
FILTER_HV_W8 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, [edi]
|
|
||||||
dec ecx
|
|
||||||
jz near .xx_exit
|
|
||||||
|
|
||||||
lea esi, [esi+2*edx]
|
|
||||||
SSE_LOAD_8P xmm6, xmm7, [esi]
|
|
||||||
FILTER_HV_W8 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [edi+eax]
|
|
||||||
dec ecx
|
|
||||||
jz near .xx_exit
|
|
||||||
|
|
||||||
lea edi, [edi+2*eax]
|
|
||||||
SSE_LOAD_8P xmm7, xmm0, [esi+edx]
|
|
||||||
FILTER_HV_W8 xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, [edi]
|
|
||||||
dec ecx
|
|
||||||
jz near .xx_exit
|
|
||||||
|
|
||||||
lea esi, [esi+2*edx]
|
|
||||||
SSE_LOAD_8P xmm0, xmm1, [esi]
|
|
||||||
FILTER_HV_W8 xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, [edi+eax]
|
|
||||||
dec ecx
|
|
||||||
jz near .xx_exit
|
|
||||||
|
|
||||||
lea edi, [edi+2*eax]
|
|
||||||
SSE_LOAD_8P xmm1, xmm2, [esi+edx]
|
|
||||||
FILTER_HV_W8 xmm4, xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3, [edi]
|
|
||||||
dec ecx
|
|
||||||
jz near .xx_exit
|
|
||||||
|
|
||||||
lea esi, [esi+2*edx]
|
|
||||||
SSE_LOAD_8P xmm2, xmm3, [esi]
|
|
||||||
FILTER_HV_W8 xmm5, xmm6, xmm7, xmm0, xmm1, xmm2, xmm3, xmm4, [edi+eax]
|
|
||||||
dec ecx
|
|
||||||
jz near .xx_exit
|
|
||||||
|
|
||||||
lea edi, [edi+2*eax]
|
|
||||||
SSE_LOAD_8P xmm3, xmm4, [esi+edx]
|
|
||||||
FILTER_HV_W8 xmm6, xmm7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, [edi]
|
|
||||||
dec ecx
|
|
||||||
jz near .xx_exit
|
|
||||||
|
|
||||||
lea esi, [esi+2*edx]
|
|
||||||
SSE_LOAD_8P xmm4, xmm5, [esi]
|
|
||||||
FILTER_HV_W8 xmm7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, [edi+eax]
|
|
||||||
dec ecx
|
|
||||||
jz near .xx_exit
|
|
||||||
|
|
||||||
lea edi, [edi+2*eax]
|
|
||||||
SSE_LOAD_8P xmm5, xmm6, [esi+edx]
|
|
||||||
jmp near .start
|
|
||||||
|
|
||||||
.xx_exit:
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,135 +0,0 @@
|
|||||||
;*!
|
|
||||||
;* \copy
|
|
||||||
;* Copyright (c) 2009-2013, Cisco Systems
|
|
||||||
;* All rights reserved.
|
|
||||||
;*
|
|
||||||
;* Redistribution and use in source and binary forms, with or without
|
|
||||||
;* modification, are permitted provided that the following conditions
|
|
||||||
;* are met:
|
|
||||||
;*
|
|
||||||
;* * Redistributions of source code must retain the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer.
|
|
||||||
;*
|
|
||||||
;* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer in
|
|
||||||
;* the documentation and/or other materials provided with the
|
|
||||||
;* distribution.
|
|
||||||
;*
|
|
||||||
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
;* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;* memzero.asm
|
|
||||||
;*
|
|
||||||
;* Abstract
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;* History
|
|
||||||
;* 9/16/2009 Created
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;*************************************************************************/
|
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
%include "asm_inc.asm"
|
|
||||||
;***********************************************************************
|
|
||||||
; Code
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************
|
|
||||||
;_inline void __cdecl WelsPrefetchZero_mmx(int8_t const*_A);
|
|
||||||
;***********************************************************************
|
|
||||||
WELS_EXTERN WelsPrefetchZero_mmx
|
|
||||||
WelsPrefetchZero_mmx:
|
|
||||||
mov eax,[esp+4]
|
|
||||||
prefetchnta [eax]
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************
|
|
||||||
; void WelsSetMemZeroAligned64_sse2(void *dst, int32_t size)
|
|
||||||
;***********************************************************************
|
|
||||||
WELS_EXTERN WelsSetMemZeroAligned64_sse2
|
|
||||||
WelsSetMemZeroAligned64_sse2:
|
|
||||||
mov eax, [esp + 4] ; dst
|
|
||||||
mov ecx, [esp + 8]
|
|
||||||
neg ecx
|
|
||||||
|
|
||||||
pxor xmm0, xmm0
|
|
||||||
.memzeroa64_sse2_loops:
|
|
||||||
movdqa [eax], xmm0
|
|
||||||
movdqa [eax+16], xmm0
|
|
||||||
movdqa [eax+32], xmm0
|
|
||||||
movdqa [eax+48], xmm0
|
|
||||||
add eax, 0x40
|
|
||||||
|
|
||||||
add ecx, 0x40
|
|
||||||
jnz near .memzeroa64_sse2_loops
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************
|
|
||||||
; void WelsSetMemZeroSize64_mmx(void *dst, int32_t size)
|
|
||||||
;***********************************************************************
|
|
||||||
WELS_EXTERN WelsSetMemZeroSize64_mmx
|
|
||||||
WelsSetMemZeroSize64_mmx:
|
|
||||||
mov eax, [esp + 4] ; dst
|
|
||||||
mov ecx, [esp + 8]
|
|
||||||
neg ecx
|
|
||||||
|
|
||||||
pxor mm0, mm0
|
|
||||||
.memzero64_mmx_loops:
|
|
||||||
movq [eax], mm0
|
|
||||||
movq [eax+8], mm0
|
|
||||||
movq [eax+16], mm0
|
|
||||||
movq [eax+24], mm0
|
|
||||||
movq [eax+32], mm0
|
|
||||||
movq [eax+40], mm0
|
|
||||||
movq [eax+48], mm0
|
|
||||||
movq [eax+56], mm0
|
|
||||||
add eax, 0x40
|
|
||||||
|
|
||||||
add ecx, 0x40
|
|
||||||
jnz near .memzero64_mmx_loops
|
|
||||||
|
|
||||||
WELSEMMS
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************
|
|
||||||
; void WelsSetMemZeroSize8_mmx(void *dst, int32_t size)
|
|
||||||
;***********************************************************************
|
|
||||||
WELS_EXTERN WelsSetMemZeroSize8_mmx
|
|
||||||
WelsSetMemZeroSize8_mmx:
|
|
||||||
mov eax, [esp + 4] ; dst
|
|
||||||
mov ecx, [esp + 8] ; size
|
|
||||||
neg ecx
|
|
||||||
pxor mm0, mm0
|
|
||||||
|
|
||||||
.memzero8_mmx_loops:
|
|
||||||
movq [eax], mm0
|
|
||||||
add eax, 0x08
|
|
||||||
|
|
||||||
add ecx, 0x08
|
|
||||||
jnz near .memzero8_mmx_loops
|
|
||||||
|
|
||||||
WELSEMMS
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
@@ -78,7 +78,7 @@ extern void_t McHorVer02WidthEq8_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_
|
|||||||
int32_t iHeight);
|
int32_t iHeight);
|
||||||
extern void_t McHorVer22Width8HorFirst_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
extern void_t McHorVer22Width8HorFirst_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
int32_t iHeight);
|
int32_t iHeight);
|
||||||
extern void_t McHorVer22VerLast_sse2 (uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
|
extern void_t McHorVer22Width8VerLastAlign_sse2 (uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
int32_t iWidth, int32_t iHeight);
|
int32_t iWidth, int32_t iHeight);
|
||||||
extern void_t PixelAvgWidthEq16_sse2 (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, int32_t iSrcAStride,
|
extern void_t PixelAvgWidthEq16_sse2 (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, int32_t iSrcAStride,
|
||||||
uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
|
uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
|
||||||
|
|||||||
@@ -362,7 +362,7 @@ static inline void_t McHorVer22WidthEq8_sse2 (uint8_t* pSrc, int32_t iSrcStride,
|
|||||||
int32_t iHeight) {
|
int32_t iHeight) {
|
||||||
ENFORCE_STACK_ALIGN_2D (int16_t, iTap, 21, 8, 16)
|
ENFORCE_STACK_ALIGN_2D (int16_t, iTap, 21, 8, 16)
|
||||||
McHorVer22Width8HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)iTap, 16, iHeight + 5);
|
McHorVer22Width8HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)iTap, 16, iHeight + 5);
|
||||||
McHorVer22VerLast_sse2 ((uint8_t*)iTap, 16, pDst, iDstStride, 8, iHeight);
|
McHorVer22Width8VerLastAlign_sse2 ((uint8_t*)iTap, 16, pDst, iDstStride, 8, iHeight);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void_t McHorVer02WidthEq16_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
static inline void_t McHorVer02WidthEq16_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
|
|||||||
@@ -28,17 +28,9 @@ DECODER_CPP_SRCS=\
|
|||||||
DECODER_OBJS += $(DECODER_CPP_SRCS:.cpp=.o)
|
DECODER_OBJS += $(DECODER_CPP_SRCS:.cpp=.o)
|
||||||
ifeq ($(USE_ASM), Yes)
|
ifeq ($(USE_ASM), Yes)
|
||||||
DECODER_ASM_SRCS=\
|
DECODER_ASM_SRCS=\
|
||||||
$(DECODER_SRCDIR)/./core/asm/asm_inc.asm\
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/block_add.asm\
|
$(DECODER_SRCDIR)/./core/asm/block_add.asm\
|
||||||
$(DECODER_SRCDIR)/./core/asm/cpuid.asm\
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/dct.asm\
|
$(DECODER_SRCDIR)/./core/asm/dct.asm\
|
||||||
$(DECODER_SRCDIR)/./core/asm/deblock.asm\
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/expand_picture.asm\
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/intra_pred.asm\
|
$(DECODER_SRCDIR)/./core/asm/intra_pred.asm\
|
||||||
$(DECODER_SRCDIR)/./core/asm/mb_copy.asm\
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/mc_chroma.asm\
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/mc_luma.asm\
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/memzero.asm\
|
|
||||||
|
|
||||||
DECODER_OBJS += $(DECODER_ASM_SRCS:.asm=.o)
|
DECODER_OBJS += $(DECODER_ASM_SRCS:.asm=.o)
|
||||||
endif
|
endif
|
||||||
@@ -113,39 +105,15 @@ $(DECODER_SRCDIR)/./plus/src/welsCodecTrace.o: $(DECODER_SRCDIR)/./plus/src/wels
|
|||||||
$(DECODER_SRCDIR)/./plus/src/welsDecoderExt.o: $(DECODER_SRCDIR)/./plus/src/welsDecoderExt.cpp
|
$(DECODER_SRCDIR)/./plus/src/welsDecoderExt.o: $(DECODER_SRCDIR)/./plus/src/welsDecoderExt.cpp
|
||||||
$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./plus/src/welsDecoderExt.o $(DECODER_SRCDIR)/./plus/src/welsDecoderExt.cpp
|
$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(DECODER_CFLAGS) $(DECODER_INCLUDES) -c -o $(DECODER_SRCDIR)/./plus/src/welsDecoderExt.o $(DECODER_SRCDIR)/./plus/src/welsDecoderExt.cpp
|
||||||
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/asm_inc.o: $(DECODER_SRCDIR)/./core/asm/asm_inc.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/asm_inc.o $(DECODER_SRCDIR)/./core/asm/asm_inc.asm
|
|
||||||
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/block_add.o: $(DECODER_SRCDIR)/./core/asm/block_add.asm
|
$(DECODER_SRCDIR)/./core/asm/block_add.o: $(DECODER_SRCDIR)/./core/asm/block_add.asm
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/block_add.o $(DECODER_SRCDIR)/./core/asm/block_add.asm
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/block_add.o $(DECODER_SRCDIR)/./core/asm/block_add.asm
|
||||||
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/cpuid.o: $(DECODER_SRCDIR)/./core/asm/cpuid.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/cpuid.o $(DECODER_SRCDIR)/./core/asm/cpuid.asm
|
|
||||||
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/dct.o: $(DECODER_SRCDIR)/./core/asm/dct.asm
|
$(DECODER_SRCDIR)/./core/asm/dct.o: $(DECODER_SRCDIR)/./core/asm/dct.asm
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/dct.o $(DECODER_SRCDIR)/./core/asm/dct.asm
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/dct.o $(DECODER_SRCDIR)/./core/asm/dct.asm
|
||||||
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/deblock.o: $(DECODER_SRCDIR)/./core/asm/deblock.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/deblock.o $(DECODER_SRCDIR)/./core/asm/deblock.asm
|
|
||||||
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/expand_picture.o: $(DECODER_SRCDIR)/./core/asm/expand_picture.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/expand_picture.o $(DECODER_SRCDIR)/./core/asm/expand_picture.asm
|
|
||||||
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/intra_pred.o: $(DECODER_SRCDIR)/./core/asm/intra_pred.asm
|
$(DECODER_SRCDIR)/./core/asm/intra_pred.o: $(DECODER_SRCDIR)/./core/asm/intra_pred.asm
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/intra_pred.o $(DECODER_SRCDIR)/./core/asm/intra_pred.asm
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/intra_pred.o $(DECODER_SRCDIR)/./core/asm/intra_pred.asm
|
||||||
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/mb_copy.o: $(DECODER_SRCDIR)/./core/asm/mb_copy.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/mb_copy.o $(DECODER_SRCDIR)/./core/asm/mb_copy.asm
|
|
||||||
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/mc_chroma.o: $(DECODER_SRCDIR)/./core/asm/mc_chroma.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/mc_chroma.o $(DECODER_SRCDIR)/./core/asm/mc_chroma.asm
|
|
||||||
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/mc_luma.o: $(DECODER_SRCDIR)/./core/asm/mc_luma.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/mc_luma.o $(DECODER_SRCDIR)/./core/asm/mc_luma.asm
|
|
||||||
|
|
||||||
$(DECODER_SRCDIR)/./core/asm/memzero.o: $(DECODER_SRCDIR)/./core/asm/memzero.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(DECODER_ASMFLAGS) $(DECODER_ASM_INCLUDES) -o $(DECODER_SRCDIR)/./core/asm/memzero.o $(DECODER_SRCDIR)/./core/asm/memzero.asm
|
|
||||||
|
|
||||||
$(LIBPREFIX)decoder.$(LIBSUFFIX): $(DECODER_OBJS)
|
$(LIBPREFIX)decoder.$(LIBSUFFIX): $(DECODER_OBJS)
|
||||||
rm -f $(LIBPREFIX)decoder.$(LIBSUFFIX)
|
rm -f $(LIBPREFIX)decoder.$(LIBSUFFIX)
|
||||||
$(AR) cr $@ $(DECODER_OBJS)
|
$(AR) cr $@ $(DECODER_OBJS)
|
||||||
|
|||||||
@@ -1,235 +0,0 @@
|
|||||||
;*!
|
|
||||||
;* \copy
|
|
||||||
;* Copyright (c) 2009-2013, Cisco Systems
|
|
||||||
;* All rights reserved.
|
|
||||||
;*
|
|
||||||
;* Redistribution and use in source and binary forms, with or without
|
|
||||||
;* modification, are permitted provided that the following conditions
|
|
||||||
;* are met:
|
|
||||||
;*
|
|
||||||
;* * Redistributions of source code must retain the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer.
|
|
||||||
;*
|
|
||||||
;* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer in
|
|
||||||
;* the documentation and/or other materials provided with the
|
|
||||||
;* distribution.
|
|
||||||
;*
|
|
||||||
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
;* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;* sse2inc.asm
|
|
||||||
;*
|
|
||||||
;* Abstract
|
|
||||||
;* macro and constant
|
|
||||||
;*
|
|
||||||
;* History
|
|
||||||
;* 8/5/2009 Created
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;*************************************************************************/
|
|
||||||
;***********************************************************************
|
|
||||||
; Options, for DEBUG
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
%if 1
|
|
||||||
%define MOVDQ movdqa
|
|
||||||
%else
|
|
||||||
%define MOVDQ movdqu
|
|
||||||
%endif
|
|
||||||
|
|
||||||
%if 1
|
|
||||||
%define WELSEMMS emms
|
|
||||||
%else
|
|
||||||
%define WELSEMMS
|
|
||||||
%endif
|
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Macros
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
%macro WELS_EXTERN 1
|
|
||||||
%ifdef PREFIX
|
|
||||||
global _%1
|
|
||||||
%define %1 _%1
|
|
||||||
%else
|
|
||||||
global %1
|
|
||||||
%endif
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro WELS_AbsW 2
|
|
||||||
pxor %2, %2
|
|
||||||
psubw %2, %1
|
|
||||||
pmaxsw %1, %2
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro MMX_XSwap 4
|
|
||||||
movq %4, %2
|
|
||||||
punpckh%1 %4, %3
|
|
||||||
punpckl%1 %2, %3
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
; pOut mm1, mm4, mm5, mm3
|
|
||||||
%macro MMX_Trans4x4W 5
|
|
||||||
MMX_XSwap wd, %1, %2, %5
|
|
||||||
MMX_XSwap wd, %3, %4, %2
|
|
||||||
MMX_XSwap dq, %1, %3, %4
|
|
||||||
MMX_XSwap dq, %5, %2, %3
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;for TRANSPOSE
|
|
||||||
%macro SSE2_XSawp 4
|
|
||||||
movdqa %4, %2
|
|
||||||
punpckl%1 %2, %3
|
|
||||||
punpckh%1 %4, %3
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
; in: xmm1, xmm2, xmm3, xmm4 pOut: xmm1, xmm4, xmm5, mm3
|
|
||||||
%macro SSE2_Trans4x4D 5
|
|
||||||
SSE2_XSawp dq, %1, %2, %5
|
|
||||||
SSE2_XSawp dq, %3, %4, %2
|
|
||||||
SSE2_XSawp qdq, %1, %3, %4
|
|
||||||
SSE2_XSawp qdq, %5, %2, %3
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;in: xmm0, xmm1, xmm2, xmm3 pOut: xmm0, xmm1, xmm3, xmm4
|
|
||||||
%macro SSE2_TransTwo4x4W 5
|
|
||||||
SSE2_XSawp wd, %1, %2, %5
|
|
||||||
SSE2_XSawp wd, %3, %4, %2
|
|
||||||
SSE2_XSawp dq, %1, %3, %4
|
|
||||||
SSE2_XSawp dq, %5, %2, %3
|
|
||||||
SSE2_XSawp qdq, %1, %5, %2
|
|
||||||
SSE2_XSawp qdq, %4, %3, %5
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;in: m1, m2, m3, m4, m5, m6, m7, m8
|
|
||||||
;pOut: m5, m3, m4, m8, m6, m2, m7, m1
|
|
||||||
%macro SSE2_TransTwo8x8B 9
|
|
||||||
movdqa %9, %8
|
|
||||||
SSE2_XSawp bw, %1, %2, %8
|
|
||||||
SSE2_XSawp bw, %3, %4, %2
|
|
||||||
SSE2_XSawp bw, %5, %6, %4
|
|
||||||
movdqa %6, %9
|
|
||||||
movdqa %9, %4
|
|
||||||
SSE2_XSawp bw, %7, %6, %4
|
|
||||||
|
|
||||||
SSE2_XSawp wd, %1, %3, %6
|
|
||||||
SSE2_XSawp wd, %8, %2, %3
|
|
||||||
SSE2_XSawp wd, %5, %7, %2
|
|
||||||
movdqa %7, %9
|
|
||||||
movdqa %9, %3
|
|
||||||
SSE2_XSawp wd, %7, %4, %3
|
|
||||||
|
|
||||||
SSE2_XSawp dq, %1, %5, %4
|
|
||||||
SSE2_XSawp dq, %6, %2, %5
|
|
||||||
SSE2_XSawp dq, %8, %7, %2
|
|
||||||
movdqa %7, %9
|
|
||||||
movdqa %9, %5
|
|
||||||
SSE2_XSawp dq, %7, %3, %5
|
|
||||||
|
|
||||||
SSE2_XSawp qdq, %1, %8, %3
|
|
||||||
SSE2_XSawp qdq, %4, %2, %8
|
|
||||||
SSE2_XSawp qdq, %6, %7, %2
|
|
||||||
movdqa %7, %9
|
|
||||||
movdqa %9, %1
|
|
||||||
SSE2_XSawp qdq, %7, %5, %1
|
|
||||||
movdqa %5, %9
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;xmm0, xmm6, xmm7, [eax], [ecx]
|
|
||||||
;xmm7 = 0, eax = pix1, ecx = pix2, xmm0 save the result
|
|
||||||
%macro SSE2_LoadDiff8P 5
|
|
||||||
movq %1, %4
|
|
||||||
punpcklbw %1, %3
|
|
||||||
movq %2, %5
|
|
||||||
punpcklbw %2, %3
|
|
||||||
psubw %1, %2
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
; m2 = m1 + m2, m1 = m1 - m2
|
|
||||||
%macro SSE2_SumSub 3
|
|
||||||
movdqa %3, %2
|
|
||||||
paddw %2, %1
|
|
||||||
psubw %1, %3
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
|
|
||||||
%macro butterfly_1to16_sse 3 ; xmm? for dst, xmm? for tmp, one byte for pSrc [generic register name: a/b/c/d]
|
|
||||||
mov %3h, %3l
|
|
||||||
movd %1, e%3x ; i.e, 1% = eax (=b0)
|
|
||||||
pshuflw %2, %1, 00h ; ..., b0 b0 b0 b0 b0 b0 b0 b0
|
|
||||||
pshufd %1, %2, 00h ; b0 b0 b0 b0, b0 b0 b0 b0, b0 b0 b0 b0, b0 b0 b0 b0
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;copy a dw into a xmm for 8 times
|
|
||||||
%macro SSE2_Copy8Times 2
|
|
||||||
movd %1, %2
|
|
||||||
punpcklwd %1, %1
|
|
||||||
pshufd %1, %1, 0
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;copy a db into a xmm for 16 times
|
|
||||||
%macro SSE2_Copy16Times 2
|
|
||||||
movd %1, %2
|
|
||||||
pshuflw %1, %1, 0
|
|
||||||
punpcklqdq %1, %1
|
|
||||||
packuswb %1, %1
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
;preprocessor constants
|
|
||||||
;***********************************************************************
|
|
||||||
;dw 32,32,32,32,32,32,32,32 for xmm
|
|
||||||
;dw 32,32,32,32 for mm
|
|
||||||
%macro WELS_DW32 1
|
|
||||||
pcmpeqw %1,%1
|
|
||||||
psrlw %1,15
|
|
||||||
psllw %1,5
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;dw 1, 1, 1, 1, 1, 1, 1, 1 for xmm
|
|
||||||
;dw 1, 1, 1, 1 for mm
|
|
||||||
%macro WELS_DW1 1
|
|
||||||
pcmpeqw %1,%1
|
|
||||||
psrlw %1,15
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;all 0 for xmm and mm
|
|
||||||
%macro WELS_Zero 1
|
|
||||||
pxor %1, %1
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;dd 1, 1, 1, 1 for xmm
|
|
||||||
;dd 1, 1 for mm
|
|
||||||
%macro WELS_DD1 1
|
|
||||||
pcmpeqw %1,%1
|
|
||||||
psrld %1,31
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;dB 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
|
|
||||||
%macro WELS_DB1 1
|
|
||||||
pcmpeqw %1,%1
|
|
||||||
psrlw %1,15
|
|
||||||
packuswb %1,%1
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -44,7 +44,7 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
%ifdef X86_32
|
||||||
SECTION .rodata align=16
|
SECTION .rodata align=16
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
@@ -457,3 +457,4 @@ CavlcParamCal_sse2:
|
|||||||
pop edi
|
pop edi
|
||||||
pop ebx
|
pop ebx
|
||||||
ret
|
ret
|
||||||
|
%endif
|
||||||
|
|||||||
@@ -42,8 +42,6 @@
|
|||||||
|
|
||||||
%include "asm_inc.asm"
|
%include "asm_inc.asm"
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
SECTION .rodata align=16
|
SECTION .rodata align=16
|
||||||
|
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
@@ -131,22 +129,27 @@ SSE2_DeQuant8 dw 10, 13, 10, 13, 13, 16, 13, 16,
|
|||||||
packuswb %1, %2
|
packuswb %1, %2
|
||||||
movd %5, %1
|
movd %5, %1
|
||||||
%endmacro
|
%endmacro
|
||||||
|
SECTION .text
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
; void __cdecl WelsDctT4_mmx( int16_t *pDct[4], uint8_t *pix1, int32_t i_pix1, uint8_t *pix2, int32_t i_pix2 )
|
; void __cdecl WelsDctT4_mmx( int16_t *pDct[4], uint8_t *pix1, int32_t i_pix1, uint8_t *pix2, int32_t i_pix2 )
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
WELS_EXTERN WelsDctT4_mmx
|
WELS_EXTERN WelsDctT4_mmx
|
||||||
WelsDctT4_mmx:
|
WelsDctT4_mmx:
|
||||||
push ebx
|
;push ebx
|
||||||
mov eax, [esp+12] ; pix1
|
;mov eax, [esp+12] ; pix1
|
||||||
mov ebx, [esp+16] ; i_pix1
|
;mov ebx, [esp+16] ; i_pix1
|
||||||
mov ecx, [esp+20] ; pix2
|
;mov ecx, [esp+20] ; pix2
|
||||||
mov edx, [esp+24] ; i_pix2
|
;mov edx, [esp+24] ; i_pix2
|
||||||
|
%assign push_num 0
|
||||||
|
LOAD_5_PARA
|
||||||
|
%ifndef X86_32
|
||||||
|
movsx r2, r2d
|
||||||
|
movsx r4, r4d
|
||||||
|
%endif
|
||||||
WELS_Zero mm7
|
WELS_Zero mm7
|
||||||
|
|
||||||
MMX_LoadDiff4x4P mm1, mm2, mm3, mm4, eax, ebx, ecx, edx, mm0, mm7
|
MMX_LoadDiff4x4P mm1, mm2, mm3, mm4, r1, r2, r3, r4, mm0, mm7
|
||||||
|
|
||||||
MMX_DCT mm1, mm2, mm3 ,mm4, mm5, mm6
|
MMX_DCT mm1, mm2, mm3 ,mm4, mm5, mm6
|
||||||
MMX_Trans4x4W mm3, mm1, mm4, mm5, mm2
|
MMX_Trans4x4W mm3, mm1, mm4, mm5, mm2
|
||||||
@@ -154,14 +157,14 @@ WelsDctT4_mmx:
|
|||||||
MMX_DCT mm3, mm5, mm2 ,mm4, mm1, mm6
|
MMX_DCT mm3, mm5, mm2 ,mm4, mm1, mm6
|
||||||
MMX_Trans4x4W mm2, mm3, mm4, mm1, mm5
|
MMX_Trans4x4W mm2, mm3, mm4, mm1, mm5
|
||||||
|
|
||||||
mov eax, [esp+ 8] ; pDct
|
;mov eax, [esp+ 8] ; pDct
|
||||||
movq [eax+ 0], mm2
|
movq [r0+ 0], mm2
|
||||||
movq [eax+ 8], mm1
|
movq [r0+ 8], mm1
|
||||||
movq [eax+16], mm5
|
movq [r0+16], mm5
|
||||||
movq [eax+24], mm4
|
movq [r0+24], mm4
|
||||||
|
|
||||||
WELSEMMS
|
WELSEMMS
|
||||||
pop ebx
|
LOAD_5_PARA_POP
|
||||||
|
;pop ebx
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
@@ -170,23 +173,28 @@ WelsDctT4_mmx:
|
|||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
WELS_EXTERN WelsIDctT4Rec_mmx
|
WELS_EXTERN WelsIDctT4Rec_mmx
|
||||||
WelsIDctT4Rec_mmx:
|
WelsIDctT4Rec_mmx:
|
||||||
push ebx
|
;push ebx
|
||||||
%define pushsize 4
|
;%define pushsize 4
|
||||||
%define p_dst esp+pushsize+4
|
;%define p_dst esp+pushsize+4
|
||||||
%define i_dst esp+pushsize+8
|
;%define i_dst esp+pushsize+8
|
||||||
%define p_pred esp+pushsize+12
|
;%define p_pred esp+pushsize+12
|
||||||
%define i_pred esp+pushsize+16
|
;%define i_pred esp+pushsize+16
|
||||||
%define pDct esp+pushsize+20
|
;%define pDct esp+pushsize+20
|
||||||
|
%assign push_num 0
|
||||||
mov eax, [pDct ]
|
LOAD_5_PARA
|
||||||
movq mm0, [eax+ 0]
|
%ifndef X86_32
|
||||||
movq mm1, [eax+ 8]
|
movsx r1, r1d
|
||||||
movq mm2, [eax+16]
|
movsx r3, r3d
|
||||||
movq mm3, [eax+24]
|
%endif
|
||||||
mov edx, [p_dst ]
|
; mov eax, [pDct ]
|
||||||
mov ecx, [i_dst ]
|
movq mm0, [r4+ 0]
|
||||||
mov eax, [p_pred]
|
movq mm1, [r4+ 8]
|
||||||
mov ebx, [i_pred]
|
movq mm2, [r4+16]
|
||||||
|
movq mm3, [r4+24]
|
||||||
|
;mov edx, [p_dst ] ; r0
|
||||||
|
;mov ecx, [i_dst ] ; r1
|
||||||
|
;mov eax, [p_pred] ; r2
|
||||||
|
;mov ebx, [i_pred] ; r3
|
||||||
|
|
||||||
MMX_Trans4x4W mm0, mm1, mm2, mm3, mm4
|
MMX_Trans4x4W mm0, mm1, mm2, mm3, mm4
|
||||||
MMX_IDCT mm1, mm2, mm3, mm4, mm0, mm6
|
MMX_IDCT mm1, mm2, mm3, mm4, mm0, mm6
|
||||||
@@ -196,21 +204,22 @@ WelsIDctT4Rec_mmx:
|
|||||||
WELS_Zero mm7
|
WELS_Zero mm7
|
||||||
WELS_DW32 mm6
|
WELS_DW32 mm6
|
||||||
|
|
||||||
MMX_StoreDiff4P mm3, mm0, mm6, mm7, [edx], [eax]
|
MMX_StoreDiff4P mm3, mm0, mm6, mm7, [r0], [r2]
|
||||||
MMX_StoreDiff4P mm4, mm0, mm6, mm7, [edx+ecx], [eax+ebx]
|
MMX_StoreDiff4P mm4, mm0, mm6, mm7, [r0+r1], [r2+r3]
|
||||||
lea edx, [edx+2*ecx]
|
lea r0, [r0+2*r1]
|
||||||
lea eax, [eax+2*ebx]
|
lea r2, [r2+2*r3]
|
||||||
MMX_StoreDiff4P mm1, mm0, mm6, mm7, [edx], [eax]
|
MMX_StoreDiff4P mm1, mm0, mm6, mm7, [r0], [r2]
|
||||||
MMX_StoreDiff4P mm2, mm0, mm6, mm7, [edx+ecx], [eax+ebx]
|
MMX_StoreDiff4P mm2, mm0, mm6, mm7, [r0+r1], [r2+r3]
|
||||||
|
|
||||||
WELSEMMS
|
WELSEMMS
|
||||||
%undef pushsize
|
LOAD_5_PARA_POP
|
||||||
%undef p_dst
|
;%undef pushsize
|
||||||
%undef i_dst
|
;%undef p_dst
|
||||||
%undef p_pred
|
;%undef i_dst
|
||||||
%undef i_pred
|
;%undef p_pred
|
||||||
%undef pDct
|
;%undef i_pred
|
||||||
pop ebx
|
;%undef pDct
|
||||||
|
; pop ebx
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
@@ -314,78 +323,88 @@ WelsIDctT4Rec_mmx:
|
|||||||
WELS_EXTERN WelsDctFourT4_sse2
|
WELS_EXTERN WelsDctFourT4_sse2
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
WelsDctFourT4_sse2:
|
WelsDctFourT4_sse2:
|
||||||
push ebx
|
;push ebx
|
||||||
push esi
|
;push esi
|
||||||
mov esi, [esp+12]
|
;mov esi, [esp+12]
|
||||||
mov eax, [esp+16] ; pix1
|
;mov eax, [esp+16] ; pix1
|
||||||
mov ebx, [esp+20] ; i_pix1
|
;mov ebx, [esp+20] ; i_pix1
|
||||||
mov ecx, [esp+24] ; pix2
|
;mov ecx, [esp+24] ; pix2
|
||||||
mov edx, [esp+28] ; i_pix2
|
;mov edx, [esp+28] ; i_pix2
|
||||||
|
%assign push_num 0
|
||||||
|
LOAD_5_PARA
|
||||||
|
%ifndef X86_32
|
||||||
|
movsx r2, r2d
|
||||||
|
movsx r4, r4d
|
||||||
|
%endif
|
||||||
pxor xmm7, xmm7
|
pxor xmm7, xmm7
|
||||||
|
|
||||||
;Load 4x8
|
;Load 4x8
|
||||||
SSE2_LoadDiff8P xmm0, xmm6, xmm7, [eax ], [ecx]
|
SSE2_LoadDiff8P xmm0, xmm6, xmm7, [r1], [r3]
|
||||||
SSE2_LoadDiff8P xmm1, xmm6, xmm7, [eax+ebx ], [ecx+edx]
|
SSE2_LoadDiff8P xmm1, xmm6, xmm7, [r1+r2], [r3+r4]
|
||||||
lea eax, [eax + 2 * ebx]
|
lea r1, [r1 + 2 * r2]
|
||||||
lea ecx, [ecx + 2 * edx]
|
lea r3, [r3 + 2 * r4]
|
||||||
SSE2_LoadDiff8P xmm2, xmm6, xmm7, [eax], [ecx]
|
SSE2_LoadDiff8P xmm2, xmm6, xmm7, [r1], [r3]
|
||||||
SSE2_LoadDiff8P xmm3, xmm6, xmm7, [eax+ebx], [ecx+edx]
|
SSE2_LoadDiff8P xmm3, xmm6, xmm7, [r1+r2], [r3+r4]
|
||||||
|
|
||||||
SSE2_DCT xmm1, xmm2, xmm3, xmm4, xmm5, xmm0
|
SSE2_DCT xmm1, xmm2, xmm3, xmm4, xmm5, xmm0
|
||||||
SSE2_TransTwo4x4W xmm2, xmm0, xmm3, xmm4, xmm1
|
SSE2_TransTwo4x4W xmm2, xmm0, xmm3, xmm4, xmm1
|
||||||
SSE2_DCT xmm0, xmm4, xmm1, xmm3, xmm5, xmm2
|
SSE2_DCT xmm0, xmm4, xmm1, xmm3, xmm5, xmm2
|
||||||
SSE2_TransTwo4x4W xmm4, xmm2, xmm1, xmm3, xmm0
|
SSE2_TransTwo4x4W xmm4, xmm2, xmm1, xmm3, xmm0
|
||||||
|
|
||||||
SSE2_Store4x8p esi, xmm4, xmm2, xmm3, xmm0, xmm5
|
SSE2_Store4x8p r0, xmm4, xmm2, xmm3, xmm0, xmm5
|
||||||
|
|
||||||
lea eax, [eax + 2 * ebx]
|
lea r1, [r1 + 2 * r2]
|
||||||
lea ecx, [ecx + 2 * edx]
|
lea r3, [r3 + 2 * r4]
|
||||||
|
|
||||||
;Load 4x8
|
;Load 4x8
|
||||||
SSE2_LoadDiff8P xmm0, xmm6, xmm7, [eax ], [ecx ]
|
SSE2_LoadDiff8P xmm0, xmm6, xmm7, [r1 ], [r3 ]
|
||||||
SSE2_LoadDiff8P xmm1, xmm6, xmm7, [eax+ebx ], [ecx+edx]
|
SSE2_LoadDiff8P xmm1, xmm6, xmm7, [r1+r2 ], [r3+r4]
|
||||||
lea eax, [eax + 2 * ebx]
|
lea r1, [r1 + 2 * r2]
|
||||||
lea ecx, [ecx + 2 * edx]
|
lea r3, [r3 + 2 * r4]
|
||||||
SSE2_LoadDiff8P xmm2, xmm6, xmm7, [eax], [ecx]
|
SSE2_LoadDiff8P xmm2, xmm6, xmm7, [r1], [r3]
|
||||||
SSE2_LoadDiff8P xmm3, xmm6, xmm7, [eax+ebx], [ecx+edx]
|
SSE2_LoadDiff8P xmm3, xmm6, xmm7, [r1+r2], [r3+r4]
|
||||||
|
|
||||||
SSE2_DCT xmm1, xmm2, xmm3, xmm4, xmm5, xmm0
|
SSE2_DCT xmm1, xmm2, xmm3, xmm4, xmm5, xmm0
|
||||||
SSE2_TransTwo4x4W xmm2, xmm0, xmm3, xmm4, xmm1
|
SSE2_TransTwo4x4W xmm2, xmm0, xmm3, xmm4, xmm1
|
||||||
SSE2_DCT xmm0, xmm4, xmm1, xmm3, xmm5, xmm2
|
SSE2_DCT xmm0, xmm4, xmm1, xmm3, xmm5, xmm2
|
||||||
SSE2_TransTwo4x4W xmm4, xmm2, xmm1, xmm3, xmm0
|
SSE2_TransTwo4x4W xmm4, xmm2, xmm1, xmm3, xmm0
|
||||||
|
|
||||||
lea esi, [esi+64]
|
lea r0, [r0+64]
|
||||||
SSE2_Store4x8p esi, xmm4, xmm2, xmm3, xmm0, xmm5
|
SSE2_Store4x8p r0, xmm4, xmm2, xmm3, xmm0, xmm5
|
||||||
|
|
||||||
pop esi
|
;pop esi
|
||||||
pop ebx
|
;pop ebx
|
||||||
|
LOAD_5_PARA_POP
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
%define rec esp + pushsize + 4
|
;%define rec esp + pushsize + 4
|
||||||
%define stride esp + pushsize + 8
|
;%define stride esp + pushsize + 8
|
||||||
%define pred esp + pushsize + 12
|
;%define pred esp + pushsize + 12
|
||||||
%define pred_stride esp + pushsize + 16
|
;%define pred_stride esp + pushsize + 16
|
||||||
%define rs esp + pushsize + 20
|
;%define rs esp + pushsize + 20
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
; void WelsIDctFourT4Rec_sse2(uint8_t *rec, int32_t stride, uint8_t *pred, int32_t pred_stride, int16_t *rs);
|
; void WelsIDctFourT4Rec_sse2(uint8_t *rec, int32_t stride, uint8_t *pred, int32_t pred_stride, int16_t *rs);
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
WELS_EXTERN WelsIDctFourT4Rec_sse2
|
WELS_EXTERN WelsIDctFourT4Rec_sse2
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
WelsIDctFourT4Rec_sse2:
|
WelsIDctFourT4Rec_sse2:
|
||||||
%define pushsize 8
|
;%define pushsize 8
|
||||||
push ebx
|
; push ebx
|
||||||
push esi
|
; push esi
|
||||||
|
|
||||||
mov eax, [rec]
|
|
||||||
mov ebx, [stride]
|
|
||||||
mov ecx, [pred]
|
|
||||||
mov edx, [pred_stride]
|
|
||||||
mov esi, [rs]
|
|
||||||
|
|
||||||
|
; mov eax, [rec]
|
||||||
|
; mov ebx, [stride]
|
||||||
|
; mov ecx, [pred]
|
||||||
|
; mov edx, [pred_stride]
|
||||||
|
; mov esi, [rs]
|
||||||
|
%assign push_num 0
|
||||||
|
LOAD_5_PARA
|
||||||
|
%ifndef X86_32
|
||||||
|
movsx r1, r1d
|
||||||
|
movsx r3, r3d
|
||||||
|
%endif
|
||||||
;Load 4x8
|
;Load 4x8
|
||||||
SSE2_Load4x8p esi, xmm0, xmm1, xmm4, xmm2, xmm5
|
SSE2_Load4x8p r4, xmm0, xmm1, xmm4, xmm2, xmm5
|
||||||
|
|
||||||
SSE2_TransTwo4x4W xmm0, xmm1, xmm4, xmm2, xmm3
|
SSE2_TransTwo4x4W xmm0, xmm1, xmm4, xmm2, xmm3
|
||||||
SSE2_IDCT xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm0
|
SSE2_IDCT xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm0
|
||||||
@@ -395,17 +414,17 @@ WelsIDctFourT4Rec_sse2:
|
|||||||
WELS_Zero xmm7
|
WELS_Zero xmm7
|
||||||
WELS_DW32 xmm6
|
WELS_DW32 xmm6
|
||||||
|
|
||||||
SSE2_StoreDiff8p xmm4, xmm5, xmm6, xmm7, [eax ], [ecx]
|
SSE2_StoreDiff8p xmm4, xmm5, xmm6, xmm7, [r0 ], [r2]
|
||||||
SSE2_StoreDiff8p xmm0, xmm5, xmm6, xmm7, [eax + ebx ], [ecx + edx]
|
SSE2_StoreDiff8p xmm0, xmm5, xmm6, xmm7, [r0 + r1 ], [r2 + r3]
|
||||||
lea eax, [eax + 2 * ebx]
|
lea r0, [r0 + 2 * r1]
|
||||||
lea ecx, [ecx + 2 * edx]
|
lea r2, [r2 + 2 * r3]
|
||||||
SSE2_StoreDiff8p xmm1, xmm5, xmm6, xmm7, [eax], [ecx]
|
SSE2_StoreDiff8p xmm1, xmm5, xmm6, xmm7, [r0], [r2]
|
||||||
SSE2_StoreDiff8p xmm2, xmm5, xmm6, xmm7, [eax + ebx ], [ecx + edx]
|
SSE2_StoreDiff8p xmm2, xmm5, xmm6, xmm7, [r0 + r1 ], [r2 + r3]
|
||||||
|
|
||||||
add esi, 64
|
add r4, 64
|
||||||
lea eax, [eax + 2 * ebx]
|
lea r0, [r0 + 2 * r1]
|
||||||
lea ecx, [ecx + 2 * edx]
|
lea r2, [r2 + 2 * r3]
|
||||||
SSE2_Load4x8p esi, xmm0, xmm1, xmm4, xmm2, xmm5
|
SSE2_Load4x8p r4, xmm0, xmm1, xmm4, xmm2, xmm5
|
||||||
|
|
||||||
SSE2_TransTwo4x4W xmm0, xmm1, xmm4, xmm2, xmm3
|
SSE2_TransTwo4x4W xmm0, xmm1, xmm4, xmm2, xmm3
|
||||||
SSE2_IDCT xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm0
|
SSE2_IDCT xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm0
|
||||||
@@ -415,15 +434,15 @@ WelsIDctFourT4Rec_sse2:
|
|||||||
WELS_Zero xmm7
|
WELS_Zero xmm7
|
||||||
WELS_DW32 xmm6
|
WELS_DW32 xmm6
|
||||||
|
|
||||||
SSE2_StoreDiff8p xmm4, xmm5, xmm6, xmm7, [eax ], [ecx]
|
SSE2_StoreDiff8p xmm4, xmm5, xmm6, xmm7, [r0 ], [r2]
|
||||||
SSE2_StoreDiff8p xmm0, xmm5, xmm6, xmm7, [eax + ebx ], [ecx + edx]
|
SSE2_StoreDiff8p xmm0, xmm5, xmm6, xmm7, [r0 + r1 ], [r2 + r3]
|
||||||
lea eax, [eax + 2 * ebx]
|
lea r0, [r0 + 2 * r1]
|
||||||
lea ecx, [ecx + 2 * edx]
|
lea r2, [r2 + 2 * r3]
|
||||||
SSE2_StoreDiff8p xmm1, xmm5, xmm6, xmm7, [eax], [ecx]
|
SSE2_StoreDiff8p xmm1, xmm5, xmm6, xmm7, [r0], [r2]
|
||||||
SSE2_StoreDiff8p xmm2, xmm5, xmm6, xmm7, [eax + ebx], [ecx + edx]
|
SSE2_StoreDiff8p xmm2, xmm5, xmm6, xmm7, [r0 + r1], [r2 + r3]
|
||||||
|
LOAD_5_PARA_POP
|
||||||
pop esi
|
; pop esi
|
||||||
pop ebx
|
; pop ebx
|
||||||
ret
|
ret
|
||||||
|
|
||||||
%macro SSE2_StoreDiff4x8p 8
|
%macro SSE2_StoreDiff4x8p 8
|
||||||
@@ -438,54 +457,60 @@ WelsIDctFourT4Rec_sse2:
|
|||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
WELS_EXTERN WelsIDctRecI16x16Dc_sse2
|
WELS_EXTERN WelsIDctRecI16x16Dc_sse2
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
%define pushsize 8
|
;%define pushsize 8
|
||||||
%define luma_dc esp + pushsize + 20
|
;%define luma_dc esp + pushsize + 20
|
||||||
WelsIDctRecI16x16Dc_sse2:
|
WelsIDctRecI16x16Dc_sse2:
|
||||||
push esi
|
%assign push_num 0
|
||||||
push edi
|
LOAD_5_PARA
|
||||||
|
%ifndef X86_32
|
||||||
|
movsx r1, r1d
|
||||||
|
movsx r3, r3d
|
||||||
|
%endif
|
||||||
|
; push esi
|
||||||
|
; push edi
|
||||||
|
|
||||||
mov ecx, [luma_dc]
|
;mov ecx, [luma_dc] ; r4
|
||||||
mov eax, [rec]
|
;mov eax, [rec] ; r0
|
||||||
mov edx, [stride]
|
;mov edx, [stride] ; r1
|
||||||
mov esi, [pred]
|
;mov esi, [pred]; r2
|
||||||
mov edi, [pred_stride]
|
;mov edi, [pred_stride]; r3
|
||||||
pxor xmm7, xmm7
|
pxor xmm7, xmm7
|
||||||
WELS_DW32 xmm6
|
WELS_DW32 xmm6
|
||||||
|
|
||||||
SSE2_Load8DC xmm0, xmm1, xmm2, xmm3, xmm6, [ecx]
|
SSE2_Load8DC xmm0, xmm1, xmm2, xmm3, xmm6, [r4]
|
||||||
SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, eax, esi, edx, edi
|
SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, r0, r2, r1, r3
|
||||||
|
|
||||||
lea eax, [eax + 2 * edx]
|
lea r0, [r0 + 2 * r1]
|
||||||
lea esi, [esi + 2 * edi]
|
lea r2, [r2 + 2 * r3]
|
||||||
SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, eax, esi, edx, edi
|
SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, r0, r2, r1, r3
|
||||||
|
|
||||||
lea eax, [eax + 2 * edx]
|
lea r0, [r0 + 2 * r1]
|
||||||
lea esi, [esi + 2 * edi]
|
lea r2, [r2 + 2 * r3]
|
||||||
SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, eax, esi, edx, edi
|
SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3
|
||||||
|
|
||||||
lea eax, [eax + 2 * edx]
|
lea r0, [r0 + 2 * r1]
|
||||||
lea esi, [esi + 2 * edi]
|
lea r2, [r2 + 2 * r3]
|
||||||
SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, eax, esi, edx, edi
|
SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3
|
||||||
|
|
||||||
SSE2_Load8DC xmm0, xmm1, xmm2, xmm3, xmm6, [ecx + 16]
|
SSE2_Load8DC xmm0, xmm1, xmm2, xmm3, xmm6, [r4 + 16]
|
||||||
lea eax, [eax + 2 * edx]
|
lea r0, [r0 + 2 * r1]
|
||||||
lea esi, [esi + 2 * edi]
|
lea r2, [r2 + 2 * r3]
|
||||||
SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, eax, esi, edx, edi
|
SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, r0, r2, r1, r3
|
||||||
|
|
||||||
lea eax, [eax + 2 * edx]
|
lea r0, [r0 + 2 * r1]
|
||||||
lea esi, [esi + 2 * edi]
|
lea r2, [r2 + 2 * r3]
|
||||||
SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, eax, esi, edx, edi
|
SSE2_StoreDiff4x8p xmm0, xmm1, xmm5, xmm7, r0, r2, r1, r3
|
||||||
|
|
||||||
lea eax, [eax + 2 * edx]
|
lea r0, [r0 + 2 * r1]
|
||||||
lea esi, [esi + 2 * edi]
|
lea r2, [r2 + 2 * r3]
|
||||||
SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, eax, esi, edx, edi
|
SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3
|
||||||
|
|
||||||
lea eax, [eax + 2 * edx]
|
lea r0, [r0 + 2 * r1]
|
||||||
lea esi, [esi + 2 * edi]
|
lea r2, [r2 + 2 * r3]
|
||||||
SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, eax, esi, edx, edi
|
SSE2_StoreDiff4x8p xmm2, xmm3, xmm5, xmm7, r0, r2, r1, r3
|
||||||
|
LOAD_5_PARA_POP
|
||||||
pop edi
|
;pop edi
|
||||||
pop esi
|
;pop esi
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
@@ -503,17 +528,16 @@ WelsIDctRecI16x16Dc_sse2:
|
|||||||
movdqa %4, %1
|
movdqa %4, %1
|
||||||
psubd %4, %2
|
psubd %4, %2
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro SSE2_Load4Col 5
|
%macro SSE2_Load4Col 5
|
||||||
movsx edx, WORD[%5]
|
movsx r2, WORD[%5]
|
||||||
movd %1, edx
|
movd %1, r2d
|
||||||
movsx edx, WORD[%5 + 0x20]
|
movsx r2, WORD[%5 + 0x20]
|
||||||
movd %2, edx
|
movd %2, r2d
|
||||||
punpckldq %1, %2
|
punpckldq %1, %2
|
||||||
movsx edx, WORD[%5 + 0x80]
|
movsx r2, WORD[%5 + 0x80]
|
||||||
movd %3, edx
|
movd %3, r2d
|
||||||
movsx edx, WORD[%5 + 0xa0]
|
movsx r2, WORD[%5 + 0xa0]
|
||||||
movd %4, edx
|
movd %4, r2d
|
||||||
punpckldq %3, %4
|
punpckldq %3, %4
|
||||||
punpcklqdq %1, %3
|
punpcklqdq %1, %3
|
||||||
%endmacro
|
%endmacro
|
||||||
@@ -523,13 +547,14 @@ WelsIDctRecI16x16Dc_sse2:
|
|||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
WELS_EXTERN WelsHadamardT4Dc_sse2
|
WELS_EXTERN WelsHadamardT4Dc_sse2
|
||||||
WelsHadamardT4Dc_sse2:
|
WelsHadamardT4Dc_sse2:
|
||||||
mov eax, [esp + 4] ; luma_dc
|
;mov eax, [esp + 4] ; luma_dc
|
||||||
mov ecx, [esp + 8] ; pDct
|
;mov ecx, [esp + 8] ; pDct
|
||||||
|
%assign push_num 0
|
||||||
SSE2_Load4Col xmm1, xmm5, xmm6, xmm0, ecx
|
LOAD_2_PARA
|
||||||
SSE2_Load4Col xmm2, xmm5, xmm6, xmm0, ecx + 0x40
|
SSE2_Load4Col xmm1, xmm5, xmm6, xmm0, r1
|
||||||
SSE2_Load4Col xmm3, xmm5, xmm6, xmm0, ecx + 0x100
|
SSE2_Load4Col xmm2, xmm5, xmm6, xmm0, r1 + 0x40
|
||||||
SSE2_Load4Col xmm4, xmm5, xmm6, xmm0, ecx + 0x140
|
SSE2_Load4Col xmm3, xmm5, xmm6, xmm0, r1 + 0x100
|
||||||
|
SSE2_Load4Col xmm4, xmm5, xmm6, xmm0, r1 + 0x140
|
||||||
|
|
||||||
SSE2_SumSubD xmm1, xmm2, xmm7
|
SSE2_SumSubD xmm1, xmm2, xmm7
|
||||||
SSE2_SumSubD xmm3, xmm4, xmm7
|
SSE2_SumSubD xmm3, xmm4, xmm7
|
||||||
@@ -548,9 +573,7 @@ WelsHadamardT4Dc_sse2:
|
|||||||
|
|
||||||
packssdw xmm3, xmm4
|
packssdw xmm3, xmm4
|
||||||
packssdw xmm2, xmm1
|
packssdw xmm2, xmm1
|
||||||
movdqa [eax+ 0], xmm3
|
movdqa [r0+ 0], xmm3
|
||||||
movdqa [eax+16], xmm2
|
movdqa [r0+16], xmm2
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -1,653 +0,0 @@
|
|||||||
;*!
|
|
||||||
;* \copy
|
|
||||||
;* Copyright (c) 2009-2013, Cisco Systems
|
|
||||||
;* All rights reserved.
|
|
||||||
;*
|
|
||||||
;* Redistribution and use in source and binary forms, with or without
|
|
||||||
;* modification, are permitted provided that the following conditions
|
|
||||||
;* are met:
|
|
||||||
;*
|
|
||||||
;* * Redistributions of source code must retain the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer.
|
|
||||||
;*
|
|
||||||
;* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer in
|
|
||||||
;* the documentation and/or other materials provided with the
|
|
||||||
;* distribution.
|
|
||||||
;*
|
|
||||||
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
;* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;* expand_picture.asm
|
|
||||||
;*
|
|
||||||
;* Abstract
|
|
||||||
;* mmxext/sse for expand_frame
|
|
||||||
;*
|
|
||||||
;* History
|
|
||||||
;* 09/25/2009 Created
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;*************************************************************************/
|
|
||||||
|
|
||||||
%include "asm_inc.asm"
|
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Macros and other preprocessor constants
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Local Data (Read Only)
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
;SECTION .rodata pData align=16
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Various memory constants (trigonometric values or rounding values)
|
|
||||||
;***********************************************************************
|
|
||||||
;%define PADDING_SIZE_ASM 32 ; PADDING_LENGTH
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Code
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
WELS_EXTERN ExpandPictureLuma_sse2
|
|
||||||
WELS_EXTERN ExpandPictureChromaAlign_sse2 ; for chroma alignment
|
|
||||||
WELS_EXTERN ExpandPictureChromaUnalign_sse2 ; for chroma unalignment
|
|
||||||
|
|
||||||
;;;;;;;expanding result;;;;;;;
|
|
||||||
|
|
||||||
;aaaa|attttttttttttttttb|bbbb
|
|
||||||
;aaaa|attttttttttttttttb|bbbb
|
|
||||||
;aaaa|attttttttttttttttb|bbbb
|
|
||||||
;aaaa|attttttttttttttttb|bbbb
|
|
||||||
;----------------------------
|
|
||||||
;aaaa|attttttttttttttttb|bbbb
|
|
||||||
;llll|l r|rrrr
|
|
||||||
;llll|l r|rrrr
|
|
||||||
;llll|l r|rrrr
|
|
||||||
;llll|l r|rrrr
|
|
||||||
;llll|l r|rrrr
|
|
||||||
;cccc|ceeeeeeeeeeeeeeeed|dddd
|
|
||||||
;----------------------------
|
|
||||||
;cccc|ceeeeeeeeeeeeeeeed|dddd
|
|
||||||
;cccc|ceeeeeeeeeeeeeeeed|dddd
|
|
||||||
;cccc|ceeeeeeeeeeeeeeeed|dddd
|
|
||||||
;cccc|ceeeeeeeeeeeeeeeed|dddd
|
|
||||||
|
|
||||||
%macro mov_line_8x4_mmx 3 ; dst, stride, mm?
|
|
||||||
movq [%1], %3
|
|
||||||
movq [%1+%2], %3
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
movq [%1], %3
|
|
||||||
movq [%1+%2], %3
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro mov_line_end8x4_mmx 3 ; dst, stride, mm?
|
|
||||||
movq [%1], %3
|
|
||||||
movq [%1+%2], %3
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
movq [%1], %3
|
|
||||||
movq [%1+%2], %3
|
|
||||||
lea %1, [%1+%2]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro mov_line_16x4_sse2 4 ; dst, stride, xmm?, u/a
|
|
||||||
movdq%4 [%1], %3 ; top(bottom)_0
|
|
||||||
movdq%4 [%1+%2], %3 ; top(bottom)_1
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
movdq%4 [%1], %3 ; top(bottom)_2
|
|
||||||
movdq%4 [%1+%2], %3 ; top(bottom)_3
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro mov_line_end16x4_sse2 4 ; dst, stride, xmm?, u/a
|
|
||||||
movdq%4 [%1], %3 ; top(bottom)_0
|
|
||||||
movdq%4 [%1+%2], %3 ; top(bottom)_1
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
movdq%4 [%1], %3 ; top(bottom)_2
|
|
||||||
movdq%4 [%1+%2], %3 ; top(bottom)_3
|
|
||||||
lea %1, [%1+%2]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro mov_line_32x4_sse2 3 ; dst, stride, xmm?
|
|
||||||
movdqa [%1], %3 ; top(bottom)_0
|
|
||||||
movdqa [%1+16], %3 ; top(bottom)_0
|
|
||||||
movdqa [%1+%2], %3 ; top(bottom)_1
|
|
||||||
movdqa [%1+%2+16], %3 ; top(bottom)_1
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
movdqa [%1], %3 ; top(bottom)_2
|
|
||||||
movdqa [%1+16], %3 ; top(bottom)_2
|
|
||||||
movdqa [%1+%2], %3 ; top(bottom)_3
|
|
||||||
movdqa [%1+%2+16], %3 ; top(bottom)_3
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro mov_line_end32x4_sse2 3 ; dst, stride, xmm?
|
|
||||||
movdqa [%1], %3 ; top(bottom)_0
|
|
||||||
movdqa [%1+16], %3 ; top(bottom)_0
|
|
||||||
movdqa [%1+%2], %3 ; top(bottom)_1
|
|
||||||
movdqa [%1+%2+16], %3 ; top(bottom)_1
|
|
||||||
lea %1, [%1+2*%2]
|
|
||||||
movdqa [%1], %3 ; top(bottom)_2
|
|
||||||
movdqa [%1+16], %3 ; top(bottom)_2
|
|
||||||
movdqa [%1+%2], %3 ; top(bottom)_3
|
|
||||||
movdqa [%1+%2+16], %3 ; top(bottom)_3
|
|
||||||
lea %1, [%1+%2]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro exp_top_bottom_sse2 1 ; iPaddingSize [luma(32)/chroma(16)]
|
|
||||||
; ebx [width/16(8)]
|
|
||||||
; esi [pSrc+0], edi [pSrc-1], ecx [-stride], 32(16) ; top
|
|
||||||
; eax [pSrc+(h-1)*stride], ebp [pSrc+(h+31)*stride], 32(16) ; bottom
|
|
||||||
|
|
||||||
%if %1 == 32 ; for luma
|
|
||||||
sar ebx, 04h ; width / 16(8) pixels
|
|
||||||
.top_bottom_loops:
|
|
||||||
; top
|
|
||||||
movdqa xmm0, [esi] ; first line of picture pData
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
mov_line_end16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
|
|
||||||
; bottom
|
|
||||||
movdqa xmm1, [eax] ; last line of picture pData
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
mov_line_end16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
|
|
||||||
lea esi, [esi+16] ; top pSrc
|
|
||||||
lea edi, [edi+16] ; top dst
|
|
||||||
lea eax, [eax+16] ; bottom pSrc
|
|
||||||
lea ebp, [ebp+16] ; bottom dst
|
|
||||||
neg ecx ; positive/negative stride need for next loop?
|
|
||||||
|
|
||||||
dec ebx
|
|
||||||
jnz near .top_bottom_loops
|
|
||||||
%elif %1 == 16 ; for chroma ??
|
|
||||||
mov edx, ebx
|
|
||||||
sar ebx, 04h ; (width / 16) pixels
|
|
||||||
.top_bottom_loops:
|
|
||||||
; top
|
|
||||||
movdqa xmm0, [esi] ; first line of picture pData
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
mov_line_end16x4_sse2 edi, ecx, xmm0, a
|
|
||||||
|
|
||||||
; bottom
|
|
||||||
movdqa xmm1, [eax] ; last line of picture pData
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
mov_line_end16x4_sse2 ebp, ecx, xmm1, a
|
|
||||||
|
|
||||||
lea esi, [esi+16] ; top pSrc
|
|
||||||
lea edi, [edi+16] ; top dst
|
|
||||||
lea eax, [eax+16] ; bottom pSrc
|
|
||||||
lea ebp, [ebp+16] ; bottom dst
|
|
||||||
neg ecx ; positive/negative stride need for next loop?
|
|
||||||
|
|
||||||
dec ebx
|
|
||||||
jnz near .top_bottom_loops
|
|
||||||
|
|
||||||
; for remaining 8 bytes
|
|
||||||
and edx, 0fh ; any 8 bytes left?
|
|
||||||
test edx, edx
|
|
||||||
jz near .to_be_continued ; no left to exit here
|
|
||||||
|
|
||||||
; top
|
|
||||||
movq mm0, [esi] ; remained 8 byte
|
|
||||||
mov_line_8x4_mmx edi, ecx, mm0 ; dst, stride, mm?
|
|
||||||
mov_line_8x4_mmx edi, ecx, mm0 ; dst, stride, mm?
|
|
||||||
mov_line_8x4_mmx edi, ecx, mm0 ; dst, stride, mm?
|
|
||||||
mov_line_end8x4_mmx edi, ecx, mm0 ; dst, stride, mm?
|
|
||||||
; bottom
|
|
||||||
movq mm1, [eax]
|
|
||||||
mov_line_8x4_mmx ebp, ecx, mm1 ; dst, stride, mm?
|
|
||||||
mov_line_8x4_mmx ebp, ecx, mm1 ; dst, stride, mm?
|
|
||||||
mov_line_8x4_mmx ebp, ecx, mm1 ; dst, stride, mm?
|
|
||||||
mov_line_end8x4_mmx ebp, ecx, mm1 ; dst, stride, mm?
|
|
||||||
WELSEMMS
|
|
||||||
|
|
||||||
.to_be_continued:
|
|
||||||
%endif
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro exp_left_right_sse2 2 ; iPaddingSize [luma(32)/chroma(16)], u/a
|
|
||||||
; ecx [height]
|
|
||||||
; esi [pSrc+0], edi [pSrc-32], edx [stride], 32(16) ; left
|
|
||||||
; ebx [pSrc+(w-1)], ebp [pSrc+w], 32(16) ; right
|
|
||||||
; xor eax, eax ; for pixel pData (uint8_t) ; make sure eax=0 at least high 24 bits of eax = 0
|
|
||||||
|
|
||||||
%if %1 == 32 ; for luma
|
|
||||||
.left_right_loops:
|
|
||||||
; left
|
|
||||||
mov al, byte [esi] ; pixel pData for left border
|
|
||||||
butterfly_1to16_sse xmm0, xmm1, a ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
movdqa [edi], xmm0
|
|
||||||
movdqa [edi+16], xmm0
|
|
||||||
|
|
||||||
; right
|
|
||||||
mov al, byte [ebx]
|
|
||||||
butterfly_1to16_sse xmm1, xmm2, a ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
movdqa [ebp], xmm1
|
|
||||||
movdqa [ebp+16], xmm1
|
|
||||||
|
|
||||||
lea esi, [esi+edx] ; left pSrc
|
|
||||||
lea edi, [edi+edx] ; left dst
|
|
||||||
lea ebx, [ebx+edx] ; right pSrc
|
|
||||||
lea ebp, [ebp+edx] ; right dst
|
|
||||||
|
|
||||||
dec ecx
|
|
||||||
jnz near .left_right_loops
|
|
||||||
%elif %1 == 16 ; for chroma ??
|
|
||||||
.left_right_loops:
|
|
||||||
; left
|
|
||||||
mov al, byte [esi] ; pixel pData for left border
|
|
||||||
butterfly_1to16_sse xmm0, xmm1, a ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
movdqa [edi], xmm0
|
|
||||||
|
|
||||||
; right
|
|
||||||
mov al, byte [ebx]
|
|
||||||
butterfly_1to16_sse xmm1, xmm2, a ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
movdq%2 [ebp], xmm1 ; might not be aligned 16 bytes in case chroma planes
|
|
||||||
|
|
||||||
lea esi, [esi+edx] ; left pSrc
|
|
||||||
lea edi, [edi+edx] ; left dst
|
|
||||||
lea ebx, [ebx+edx] ; right pSrc
|
|
||||||
lea ebp, [ebp+edx] ; right dst
|
|
||||||
|
|
||||||
dec ecx
|
|
||||||
jnz near .left_right_loops
|
|
||||||
%endif
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
%macro exp_cross_sse2 2 ; iPaddingSize [luma(32)/chroma(16)], u/a
|
|
||||||
; top-left: (x)mm3, top-right: (x)mm4, bottom-left: (x)mm5, bottom-right: (x)mm6
|
|
||||||
; edi: TL, ebp: TR, eax: BL, ebx: BR, ecx, -stride
|
|
||||||
%if %1 == 32 ; luma
|
|
||||||
; TL
|
|
||||||
mov_line_32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
mov_line_end32x4_sse2 edi, ecx, xmm3 ; dst, stride, xmm?
|
|
||||||
|
|
||||||
; TR
|
|
||||||
mov_line_32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
mov_line_end32x4_sse2 ebp, ecx, xmm4 ; dst, stride, xmm?
|
|
||||||
|
|
||||||
; BL
|
|
||||||
mov_line_32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
mov_line_end32x4_sse2 eax, ecx, xmm5 ; dst, stride, xmm?
|
|
||||||
|
|
||||||
; BR
|
|
||||||
mov_line_32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
mov_line_32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
mov_line_end32x4_sse2 ebx, ecx, xmm6 ; dst, stride, xmm?
|
|
||||||
%elif %1 == 16 ; chroma
|
|
||||||
; TL
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm3, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm3, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 edi, ecx, xmm3, a ; dst, stride, xmm?
|
|
||||||
mov_line_end16x4_sse2 edi, ecx, xmm3, a ; dst, stride, xmm?
|
|
||||||
|
|
||||||
; TR
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm4, %2 ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm4, %2 ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 ebp, ecx, xmm4, %2 ; dst, stride, xmm?
|
|
||||||
mov_line_end16x4_sse2 ebp, ecx, xmm4, %2 ; dst, stride, xmm?
|
|
||||||
|
|
||||||
; BL
|
|
||||||
mov_line_16x4_sse2 eax, ecx, xmm5, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 eax, ecx, xmm5, a ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 eax, ecx, xmm5, a ; dst, stride, xmm?
|
|
||||||
mov_line_end16x4_sse2 eax, ecx, xmm5, a ; dst, stride, xmm?
|
|
||||||
|
|
||||||
; BR
|
|
||||||
mov_line_16x4_sse2 ebx, ecx, xmm6, %2 ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 ebx, ecx, xmm6, %2 ; dst, stride, xmm?
|
|
||||||
mov_line_16x4_sse2 ebx, ecx, xmm6, %2 ; dst, stride, xmm?
|
|
||||||
mov_line_end16x4_sse2 ebx, ecx, xmm6, %2 ; dst, stride, xmm?
|
|
||||||
%endif
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************----------------
|
|
||||||
; void ExpandPictureLuma_sse2( uint8_t *pDst,
|
|
||||||
; const int32_t iStride,
|
|
||||||
; const int32_t iWidth,
|
|
||||||
; const int32_t iHeight );
|
|
||||||
;***********************************************************************----------------
|
|
||||||
ExpandPictureLuma_sse2:
|
|
||||||
push ebx
|
|
||||||
push edx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
|
|
||||||
; for both top and bottom border
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; p_dst
|
|
||||||
mov edx, [esp+28] ; stride
|
|
||||||
mov ebx, [esp+32] ; width
|
|
||||||
mov eax, [esp+36] ; height
|
|
||||||
; also prepare for cross border pData top-left: xmm3
|
|
||||||
; xor ecx, ecx
|
|
||||||
mov cl, byte [esi]
|
|
||||||
butterfly_1to16_sse xmm3, xmm4, c ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; load top border
|
|
||||||
mov ecx, edx ; stride
|
|
||||||
neg ecx ; -stride
|
|
||||||
lea edi, [esi+ecx] ; last line of top border
|
|
||||||
; load bottom border
|
|
||||||
dec eax ; h-1
|
|
||||||
imul eax, edx ; (h-1)*stride
|
|
||||||
lea eax, [esi+eax] ; last line of picture pData
|
|
||||||
sal edx, 05h ; 32*stride
|
|
||||||
lea ebp, [eax+edx] ; last line of bottom border, (h-1)*stride + 32 * stride
|
|
||||||
; also prepare for cross border pData: bottom-left with xmm5, bottom-right xmm6
|
|
||||||
dec ebx ; width-1
|
|
||||||
lea ebx, [eax+ebx] ; dst[w-1][h-1]
|
|
||||||
; xor edx, edx
|
|
||||||
mov dl, byte [eax] ; bottom-left
|
|
||||||
butterfly_1to16_sse xmm5, xmm6, d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
mov dl, byte [ebx] ; bottom-right
|
|
||||||
butterfly_1to16_sse xmm6, xmm4, d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; for top & bottom expanding
|
|
||||||
mov ebx, [esp+32] ; width
|
|
||||||
exp_top_bottom_sse2 32
|
|
||||||
|
|
||||||
; for both left and right border
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; p_dst: left border pSrc
|
|
||||||
mov edx, [esp+28] ; stride
|
|
||||||
mov ebx, [esp+32] ; width
|
|
||||||
mov ecx, [esp+36] ; height
|
|
||||||
; load left border
|
|
||||||
mov eax, -32 ; luma=-32, chroma=-16
|
|
||||||
lea edi, [esi+eax] ; left border dst
|
|
||||||
dec ebx
|
|
||||||
lea ebx, [esi+ebx] ; right border pSrc, (p_dst + width - 1)
|
|
||||||
lea ebp, [ebx+1] ; right border dst
|
|
||||||
; prepare for cross border pData: top-right with xmm4
|
|
||||||
; xor eax, eax
|
|
||||||
mov al, byte [ebx] ; top-right
|
|
||||||
butterfly_1to16_sse xmm4, xmm0, a ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; for left & right border expanding
|
|
||||||
exp_left_right_sse2 32, a
|
|
||||||
|
|
||||||
; for cross border [top-left, top-right, bottom-left, bottom-right]
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; p_dst
|
|
||||||
mov ecx, [esp+28] ; stride
|
|
||||||
mov ebx, [esp+32] ; width
|
|
||||||
mov edx, [esp+36] ; height
|
|
||||||
; have done xmm3,..,xmm6 cross pData initialization above, perform pading as below, To be continued..
|
|
||||||
mov eax, -32 ; luma=-32, chroma=-16
|
|
||||||
neg ecx ; -stride
|
|
||||||
lea edi, [esi+eax]
|
|
||||||
lea edi, [edi+ecx] ; last line of top-left border
|
|
||||||
lea ebp, [esi+ebx]
|
|
||||||
lea ebp, [ebp+ecx] ; last line of top-right border
|
|
||||||
add edx, 32 ; height+32(16), luma=32, chroma=16
|
|
||||||
mov ecx, [esp+28] ; stride
|
|
||||||
imul edx, ecx ; (height+32(16)) * stride
|
|
||||||
lea eax, [edi+edx] ; last line of bottom-left border
|
|
||||||
lea ebx, [ebp+edx] ; last line of bottom-right border
|
|
||||||
neg ecx ; -stride
|
|
||||||
; for left & right border expanding
|
|
||||||
exp_cross_sse2 32, a
|
|
||||||
|
|
||||||
; sfence ; commit cache write back memory
|
|
||||||
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop edx
|
|
||||||
pop ebx
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************----------------
|
|
||||||
; void ExpandPictureChromaAlign_sse2( uint8_t *pDst,
|
|
||||||
; const int32_t iStride,
|
|
||||||
; const int32_t iWidth,
|
|
||||||
; const int32_t iHeight );
|
|
||||||
;***********************************************************************----------------
|
|
||||||
ExpandPictureChromaAlign_sse2:
|
|
||||||
push ebx
|
|
||||||
push edx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
|
|
||||||
; for both top and bottom border
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; p_dst
|
|
||||||
mov edx, [esp+28] ; stride
|
|
||||||
mov ebx, [esp+32] ; width
|
|
||||||
mov eax, [esp+36] ; height
|
|
||||||
; also prepare for cross border pData top-left: xmm3
|
|
||||||
; xor ecx, ecx
|
|
||||||
mov cl, byte [esi]
|
|
||||||
butterfly_1to16_sse xmm3, xmm4, c ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; load top border
|
|
||||||
mov ecx, edx ; stride
|
|
||||||
neg ecx ; -stride
|
|
||||||
lea edi, [esi+ecx] ; last line of top border
|
|
||||||
; load bottom border
|
|
||||||
dec eax ; h-1
|
|
||||||
imul eax, edx ; (h-1)*stride
|
|
||||||
lea eax, [esi+eax] ; last line of picture pData
|
|
||||||
sal edx, 04h ; 16*stride
|
|
||||||
lea ebp, [eax+edx] ; last line of bottom border, (h-1)*stride + 16 * stride
|
|
||||||
; also prepare for cross border pData: bottom-left with xmm5, bottom-right xmm6
|
|
||||||
dec ebx ; width-1
|
|
||||||
lea ebx, [eax+ebx] ; dst[w-1][h-1]
|
|
||||||
; xor edx, edx
|
|
||||||
mov dl, byte [eax] ; bottom-left
|
|
||||||
butterfly_1to16_sse xmm5, xmm6, d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
mov dl, byte [ebx] ; bottom-right
|
|
||||||
butterfly_1to16_sse xmm6, xmm4, d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; for top & bottom expanding
|
|
||||||
mov ebx, [esp+32] ; width
|
|
||||||
exp_top_bottom_sse2 16
|
|
||||||
|
|
||||||
; for both left and right border
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; p_dst: left border pSrc
|
|
||||||
mov edx, [esp+28] ; stride
|
|
||||||
mov ebx, [esp+32] ; width
|
|
||||||
mov ecx, [esp+36] ; height
|
|
||||||
; load left border
|
|
||||||
mov eax, -16 ; luma=-32, chroma=-16
|
|
||||||
lea edi, [esi+eax] ; left border dst
|
|
||||||
dec ebx
|
|
||||||
lea ebx, [esi+ebx] ; right border pSrc, (p_dst + width - 1)
|
|
||||||
lea ebp, [ebx+1] ; right border dst
|
|
||||||
; prepare for cross border pData: top-right with xmm4
|
|
||||||
; xor eax, eax
|
|
||||||
mov al, byte [ebx] ; top-right
|
|
||||||
butterfly_1to16_sse xmm4, xmm0, a ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; for left & right border expanding
|
|
||||||
exp_left_right_sse2 16, a
|
|
||||||
|
|
||||||
; for cross border [top-left, top-right, bottom-left, bottom-right]
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; p_dst
|
|
||||||
mov ecx, [esp+28] ; stride
|
|
||||||
mov ebx, [esp+32] ; width
|
|
||||||
mov edx, [esp+36] ; height
|
|
||||||
; have done xmm3,..,xmm6 cross pData initialization above, perform pading as below, To be continued..
|
|
||||||
mov eax, -16 ; chroma=-16
|
|
||||||
neg ecx ; -stride
|
|
||||||
lea edi, [esi+eax]
|
|
||||||
lea edi, [edi+ecx] ; last line of top-left border
|
|
||||||
lea ebp, [esi+ebx]
|
|
||||||
lea ebp, [ebp+ecx] ; last line of top-right border
|
|
||||||
mov ecx, [esp+28] ; stride
|
|
||||||
add edx, 16 ; height+16, luma=32, chroma=16
|
|
||||||
imul edx, ecx ; (height+16) * stride
|
|
||||||
lea eax, [edi+edx] ; last line of bottom-left border
|
|
||||||
lea ebx, [ebp+edx] ; last line of bottom-right border
|
|
||||||
neg ecx ; -stride
|
|
||||||
; for left & right border expanding
|
|
||||||
exp_cross_sse2 16, a
|
|
||||||
|
|
||||||
; sfence ; commit cache write back memory
|
|
||||||
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop edx
|
|
||||||
pop ebx
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************----------------
|
|
||||||
; void ExpandPictureChromaUnalign_sse2( uint8_t *pDst,
|
|
||||||
; const int32_t iStride,
|
|
||||||
; const int32_t iWidth,
|
|
||||||
; const int32_t iHeight );
|
|
||||||
;***********************************************************************----------------
|
|
||||||
ExpandPictureChromaUnalign_sse2:
|
|
||||||
push ebx
|
|
||||||
push edx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
|
|
||||||
; for both top and bottom border
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; p_dst
|
|
||||||
mov edx, [esp+28] ; stride
|
|
||||||
mov ebx, [esp+32] ; width
|
|
||||||
mov eax, [esp+36] ; height
|
|
||||||
; also prepare for cross border pData top-left: xmm3
|
|
||||||
; xor ecx, ecx
|
|
||||||
mov cl, byte [esi]
|
|
||||||
butterfly_1to16_sse xmm3, xmm4, c ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; load top border
|
|
||||||
mov ecx, edx ; stride
|
|
||||||
neg ecx ; -stride
|
|
||||||
lea edi, [esi+ecx] ; last line of top border
|
|
||||||
; load bottom border
|
|
||||||
dec eax ; h-1
|
|
||||||
imul eax, edx ; (h-1)*stride
|
|
||||||
lea eax, [esi+eax] ; last line of picture pData
|
|
||||||
sal edx, 04h ; 16*stride
|
|
||||||
lea ebp, [eax+edx] ; last line of bottom border, (h-1)*stride + 16 * stride
|
|
||||||
; also prepare for cross border pData: bottom-left with xmm5, bottom-right xmm6
|
|
||||||
dec ebx ; width-1
|
|
||||||
lea ebx, [eax+ebx] ; dst[w-1][h-1]
|
|
||||||
; xor edx, edx
|
|
||||||
mov dl, byte [eax] ; bottom-left
|
|
||||||
butterfly_1to16_sse xmm5, xmm6, d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
mov dl, byte [ebx] ; bottom-right
|
|
||||||
butterfly_1to16_sse xmm6, xmm4, d ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; for top & bottom expanding
|
|
||||||
mov ebx, [esp+32] ; width
|
|
||||||
exp_top_bottom_sse2 16
|
|
||||||
|
|
||||||
; for both left and right border
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; p_dst: left border pSrc
|
|
||||||
mov edx, [esp+28] ; stride
|
|
||||||
mov ebx, [esp+32] ; width
|
|
||||||
mov ecx, [esp+36] ; height
|
|
||||||
; load left border
|
|
||||||
mov eax, -16 ; luma=-32, chroma=-16
|
|
||||||
lea edi, [esi+eax] ; left border dst
|
|
||||||
dec ebx
|
|
||||||
lea ebx, [esi+ebx] ; right border pSrc, (p_dst + width - 1)
|
|
||||||
lea ebp, [ebx+1] ; right border dst
|
|
||||||
; prepare for cross border pData: top-right with xmm4
|
|
||||||
; xor eax, eax
|
|
||||||
mov al, byte [ebx] ; top-right
|
|
||||||
butterfly_1to16_sse xmm4, xmm0, a ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
; for left & right border expanding
|
|
||||||
exp_left_right_sse2 16, u
|
|
||||||
|
|
||||||
; for cross border [top-left, top-right, bottom-left, bottom-right]
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
mov esi, [esp+24] ; p_dst
|
|
||||||
mov ecx, [esp+28] ; stride
|
|
||||||
mov ebx, [esp+32] ; width
|
|
||||||
mov edx, [esp+36] ; height
|
|
||||||
; have done xmm3,..,xmm6 cross pData initialization above, perform pading as below, To be continued..
|
|
||||||
neg ecx ; -stride
|
|
||||||
mov eax, -16 ; chroma=-16
|
|
||||||
lea edi, [esi+eax]
|
|
||||||
lea edi, [edi+ecx] ; last line of top-left border
|
|
||||||
lea ebp, [esi+ebx]
|
|
||||||
lea ebp, [ebp+ecx] ; last line of top-right border
|
|
||||||
mov ecx, [esp+28] ; stride
|
|
||||||
add edx, 16 ; height+16, luma=32, chroma=16
|
|
||||||
imul edx, ecx ; (height+16) * stride
|
|
||||||
lea eax, [edi+edx] ; last line of bottom-left border
|
|
||||||
lea ebx, [ebp+edx] ; last line of bottom-right border
|
|
||||||
neg ecx ; -stride
|
|
||||||
; for left & right border expanding
|
|
||||||
exp_cross_sse2 16, u
|
|
||||||
|
|
||||||
; sfence ; commit cache write back memory
|
|
||||||
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop edx
|
|
||||||
pop ebx
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,156 +0,0 @@
|
|||||||
;*!
|
|
||||||
;* \copy
|
|
||||||
;* Copyright (c) 2009-2013, Cisco Systems
|
|
||||||
;* All rights reserved.
|
|
||||||
;*
|
|
||||||
;* Redistribution and use in source and binary forms, with or without
|
|
||||||
;* modification, are permitted provided that the following conditions
|
|
||||||
;* are met:
|
|
||||||
;*
|
|
||||||
;* * Redistributions of source code must retain the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer.
|
|
||||||
;*
|
|
||||||
;* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer in
|
|
||||||
;* the documentation and/or other materials provided with the
|
|
||||||
;* distribution.
|
|
||||||
;*
|
|
||||||
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
;* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;* intra_pred_util.asm
|
|
||||||
;*
|
|
||||||
;* Abstract
|
|
||||||
;* mmxext/sse for WelsFillingPred8to16, WelsFillingPred8x2to16 and
|
|
||||||
;* WelsFillingPred1to16 etc.
|
|
||||||
;*
|
|
||||||
;* History
|
|
||||||
;* 09/29/2009 Created
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;*************************************************************************/
|
|
||||||
|
|
||||||
%include "asm_inc.asm"
|
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Macros and other preprocessor constants
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Local Data (Read Only)
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
;SECTION .rodata pData align=16
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Various memory constants (trigonometric values or rounding values)
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Code
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
WELS_EXTERN WelsFillingPred8to16_mmx
|
|
||||||
WELS_EXTERN WelsFillingPred8x2to16_mmx
|
|
||||||
WELS_EXTERN WelsFillingPred1to16_mmx
|
|
||||||
WELS_EXTERN WelsFillingPred8x2to16_sse2
|
|
||||||
WELS_EXTERN WelsFillingPred1to16_sse2
|
|
||||||
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************----------------
|
|
||||||
; void WelsFillingPred8to16_mmx( uint8_t *pred, uint8_t *v );
|
|
||||||
;***********************************************************************----------------
|
|
||||||
WelsFillingPred8to16_mmx:
|
|
||||||
mov eax, [esp+4] ; pred
|
|
||||||
mov ecx, [esp+8] ; v
|
|
||||||
|
|
||||||
movq mm0, [ecx]
|
|
||||||
movq [eax ], mm0
|
|
||||||
movq [eax+8], mm0
|
|
||||||
|
|
||||||
WELSEMMS
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************----------------
|
|
||||||
; void WelsFillingPred8x2to16_mmx( uint8_t *pred, uint8_t *v );
|
|
||||||
;***********************************************************************----------------
|
|
||||||
WelsFillingPred8x2to16_mmx:
|
|
||||||
mov eax, [esp+4] ; pred
|
|
||||||
mov ecx, [esp+8] ; v
|
|
||||||
|
|
||||||
movq mm0, [ecx ]
|
|
||||||
movq mm1, [ecx+8]
|
|
||||||
movq [eax ], mm0
|
|
||||||
movq [eax+8], mm1
|
|
||||||
|
|
||||||
WELSEMMS
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
%macro butterfly_1to8_mmx 3 ; mm? for dst, mm? for tmp, one byte for pSrc [generic register name: a/b/c/d]
|
|
||||||
mov %3h, %3l
|
|
||||||
movd %2, e%3x ; i.e, 1% = eax (=b0)
|
|
||||||
pshufw %1, %2, 00h ; b0 b0 b0 b0, b0 b0 b0 b0
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************----------------
|
|
||||||
; void WelsFillingPred1to16_mmx( uint8_t *pred, const uint8_t v );
|
|
||||||
;***********************************************************************----------------
|
|
||||||
WelsFillingPred1to16_mmx:
|
|
||||||
mov eax, [esp+4] ; pred
|
|
||||||
|
|
||||||
mov cl, byte [esp+8] ; v
|
|
||||||
butterfly_1to8_mmx mm0, mm1, c ; mm? for dst, mm? for tmp, one byte for pSrc [generic register name: a/b/c/d]
|
|
||||||
|
|
||||||
movq [eax ], mm0
|
|
||||||
movq [eax+8], mm0
|
|
||||||
|
|
||||||
WELSEMMS
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************----------------
|
|
||||||
; void WelsFillingPred8x2to16_sse2( uint8_t *pred, uint8_t *v );
|
|
||||||
;***********************************************************************----------------
|
|
||||||
WelsFillingPred8x2to16_sse2:
|
|
||||||
mov eax, [esp+4] ; pred
|
|
||||||
mov ecx, [esp+8] ; v
|
|
||||||
|
|
||||||
movdqa xmm0, [ecx]
|
|
||||||
movdqa [eax], xmm0
|
|
||||||
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************----------------
|
|
||||||
; void WelsFillingPred1to16_sse2( uint8_t *pred, const uint8_t v );
|
|
||||||
;***********************************************************************----------------
|
|
||||||
WelsFillingPred1to16_sse2:
|
|
||||||
mov eax, [esp+4] ; pred
|
|
||||||
|
|
||||||
mov cl, byte [esp+8] ; v
|
|
||||||
butterfly_1to16_sse xmm0, xmm1, c ; dst, tmp, pSrc [generic register name: a/b/c/d]
|
|
||||||
|
|
||||||
movdqa [eax], xmm0
|
|
||||||
|
|
||||||
ret
|
|
||||||
@@ -1,687 +0,0 @@
|
|||||||
;*!
|
|
||||||
;* \copy
|
|
||||||
;* Copyright (c) 2009-2013, Cisco Systems
|
|
||||||
;* All rights reserved.
|
|
||||||
;*
|
|
||||||
;* Redistribution and use in source and binary forms, with or without
|
|
||||||
;* modification, are permitted provided that the following conditions
|
|
||||||
;* are met:
|
|
||||||
;*
|
|
||||||
;* * Redistributions of source code must retain the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer.
|
|
||||||
;*
|
|
||||||
;* * Redistributions in binary form must reproduce the above copyright
|
|
||||||
;* notice, this list of conditions and the following disclaimer in
|
|
||||||
;* the documentation and/or other materials provided with the
|
|
||||||
;* distribution.
|
|
||||||
;*
|
|
||||||
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
;* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;* mb_copy.asm
|
|
||||||
;*
|
|
||||||
;* Abstract
|
|
||||||
;* mb_copy
|
|
||||||
;*
|
|
||||||
;*
|
|
||||||
;*********************************************************************************************/
|
|
||||||
%include "asm_inc.asm"
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Macros and other preprocessor constants
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Code
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
WELS_EXTERN WelsCopy16x16_sse2
|
|
||||||
WELS_EXTERN WelsCopy16x16NotAligned_sse2
|
|
||||||
WELS_EXTERN WelsCopy8x8_mmx
|
|
||||||
WELS_EXTERN WelsCopy16x8NotAligned_sse2 ;
|
|
||||||
WELS_EXTERN WelsCopy8x16_mmx ;
|
|
||||||
WELS_EXTERN UpdateMbMv_sse2 ;
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; void WelsCopy16x16_sse2( uint8_t* Dst,
|
|
||||||
; int32_t iStrideD,
|
|
||||||
; uint8_t* Src,
|
|
||||||
; int32_t iStrideS )
|
|
||||||
;***********************************************************************
|
|
||||||
ALIGN 16
|
|
||||||
WelsCopy16x16_sse2:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebx
|
|
||||||
|
|
||||||
mov edi, [esp+16] ; Dst
|
|
||||||
mov eax, [esp+20] ; iStrideD
|
|
||||||
mov esi, [esp+24] ; Src
|
|
||||||
mov ecx, [esp+28] ; iStrideS
|
|
||||||
|
|
||||||
lea ebx, [eax+2*eax] ; x3
|
|
||||||
lea edx, [ecx+2*ecx] ; x3
|
|
||||||
|
|
||||||
movdqa xmm0, [esi]
|
|
||||||
movdqa xmm1, [esi+ecx]
|
|
||||||
movdqa xmm2, [esi+2*ecx]
|
|
||||||
movdqa xmm3, [esi+edx]
|
|
||||||
lea esi, [esi+4*ecx]
|
|
||||||
movdqa xmm4, [esi]
|
|
||||||
movdqa xmm5, [esi+ecx]
|
|
||||||
movdqa xmm6, [esi+2*ecx]
|
|
||||||
movdqa xmm7, [esi+edx]
|
|
||||||
lea esi, [esi+4*ecx]
|
|
||||||
|
|
||||||
movdqa [edi], xmm0
|
|
||||||
movdqa [edi+eax], xmm1
|
|
||||||
movdqa [edi+2*eax], xmm2
|
|
||||||
movdqa [edi+ebx], xmm3
|
|
||||||
lea edi, [edi+4*eax]
|
|
||||||
movdqa [edi], xmm4
|
|
||||||
movdqa [edi+eax], xmm5
|
|
||||||
movdqa [edi+2*eax], xmm6
|
|
||||||
movdqa [edi+ebx], xmm7
|
|
||||||
lea edi, [edi+4*eax]
|
|
||||||
|
|
||||||
movdqa xmm0, [esi]
|
|
||||||
movdqa xmm1, [esi+ecx]
|
|
||||||
movdqa xmm2, [esi+2*ecx]
|
|
||||||
movdqa xmm3, [esi+edx]
|
|
||||||
lea esi, [esi+4*ecx]
|
|
||||||
movdqa xmm4, [esi]
|
|
||||||
movdqa xmm5, [esi+ecx]
|
|
||||||
movdqa xmm6, [esi+2*ecx]
|
|
||||||
movdqa xmm7, [esi+edx]
|
|
||||||
|
|
||||||
movdqa [edi], xmm0
|
|
||||||
movdqa [edi+eax], xmm1
|
|
||||||
movdqa [edi+2*eax], xmm2
|
|
||||||
movdqa [edi+ebx], xmm3
|
|
||||||
lea edi, [edi+4*eax]
|
|
||||||
movdqa [edi], xmm4
|
|
||||||
movdqa [edi+eax], xmm5
|
|
||||||
movdqa [edi+2*eax], xmm6
|
|
||||||
movdqa [edi+ebx], xmm7
|
|
||||||
|
|
||||||
pop ebx
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; void WelsCopy16x16NotAligned_sse2( uint8_t* Dst,
|
|
||||||
; int32_t iStrideD,
|
|
||||||
; uint8_t* Src,
|
|
||||||
; int32_t iStrideS )
|
|
||||||
;***********************************************************************
|
|
||||||
ALIGN 16
|
|
||||||
; dst can be align with 16 bytes, but not sure about pSrc, 12/29/2011
|
|
||||||
WelsCopy16x16NotAligned_sse2:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebx
|
|
||||||
|
|
||||||
mov edi, [esp+16] ; Dst
|
|
||||||
mov eax, [esp+20] ; iStrideD
|
|
||||||
mov esi, [esp+24] ; Src
|
|
||||||
mov ecx, [esp+28] ; iStrideS
|
|
||||||
|
|
||||||
lea ebx, [eax+2*eax] ; x3
|
|
||||||
lea edx, [ecx+2*ecx] ; x3
|
|
||||||
|
|
||||||
movdqu xmm0, [esi]
|
|
||||||
movdqu xmm1, [esi+ecx]
|
|
||||||
movdqu xmm2, [esi+2*ecx]
|
|
||||||
movdqu xmm3, [esi+edx]
|
|
||||||
lea esi, [esi+4*ecx]
|
|
||||||
movdqu xmm4, [esi]
|
|
||||||
movdqu xmm5, [esi+ecx]
|
|
||||||
movdqu xmm6, [esi+2*ecx]
|
|
||||||
movdqu xmm7, [esi+edx]
|
|
||||||
lea esi, [esi+4*ecx]
|
|
||||||
|
|
||||||
movdqa [edi], xmm0
|
|
||||||
movdqa [edi+eax], xmm1
|
|
||||||
movdqa [edi+2*eax], xmm2
|
|
||||||
movdqa [edi+ebx], xmm3
|
|
||||||
lea edi, [edi+4*eax]
|
|
||||||
movdqa [edi], xmm4
|
|
||||||
movdqa [edi+eax], xmm5
|
|
||||||
movdqa [edi+2*eax], xmm6
|
|
||||||
movdqa [edi+ebx], xmm7
|
|
||||||
lea edi, [edi+4*eax]
|
|
||||||
|
|
||||||
movdqu xmm0, [esi]
|
|
||||||
movdqu xmm1, [esi+ecx]
|
|
||||||
movdqu xmm2, [esi+2*ecx]
|
|
||||||
movdqu xmm3, [esi+edx]
|
|
||||||
lea esi, [esi+4*ecx]
|
|
||||||
movdqu xmm4, [esi]
|
|
||||||
movdqu xmm5, [esi+ecx]
|
|
||||||
movdqu xmm6, [esi+2*ecx]
|
|
||||||
movdqu xmm7, [esi+edx]
|
|
||||||
|
|
||||||
movdqa [edi], xmm0
|
|
||||||
movdqa [edi+eax], xmm1
|
|
||||||
movdqa [edi+2*eax], xmm2
|
|
||||||
movdqa [edi+ebx], xmm3
|
|
||||||
lea edi, [edi+4*eax]
|
|
||||||
movdqa [edi], xmm4
|
|
||||||
movdqa [edi+eax], xmm5
|
|
||||||
movdqa [edi+2*eax], xmm6
|
|
||||||
movdqa [edi+ebx], xmm7
|
|
||||||
|
|
||||||
pop ebx
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
; , 12/29/2011
|
|
||||||
;***********************************************************************
|
|
||||||
; void WelsCopy16x8NotAligned_sse2(uint8_t* Dst,
|
|
||||||
; int32_t iStrideD,
|
|
||||||
; uint8_t* Src,
|
|
||||||
; int32_t iStrideS )
|
|
||||||
;***********************************************************************
|
|
||||||
ALIGN 16
|
|
||||||
WelsCopy16x8NotAligned_sse2:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebx
|
|
||||||
|
|
||||||
mov edi, [esp+16] ; Dst
|
|
||||||
mov eax, [esp+20] ; iStrideD
|
|
||||||
mov esi, [esp+24] ; Src
|
|
||||||
mov ecx, [esp+28] ; iStrideS
|
|
||||||
|
|
||||||
lea ebx, [eax+2*eax] ; x3
|
|
||||||
lea edx, [ecx+2*ecx] ; x3
|
|
||||||
|
|
||||||
movdqu xmm0, [esi]
|
|
||||||
movdqu xmm1, [esi+ecx]
|
|
||||||
movdqu xmm2, [esi+2*ecx]
|
|
||||||
movdqu xmm3, [esi+edx]
|
|
||||||
lea esi, [esi+4*ecx]
|
|
||||||
movdqu xmm4, [esi]
|
|
||||||
movdqu xmm5, [esi+ecx]
|
|
||||||
movdqu xmm6, [esi+2*ecx]
|
|
||||||
movdqu xmm7, [esi+edx]
|
|
||||||
|
|
||||||
movdqa [edi], xmm0
|
|
||||||
movdqa [edi+eax], xmm1
|
|
||||||
movdqa [edi+2*eax], xmm2
|
|
||||||
movdqa [edi+ebx], xmm3
|
|
||||||
lea edi, [edi+4*eax]
|
|
||||||
movdqa [edi], xmm4
|
|
||||||
movdqa [edi+eax], xmm5
|
|
||||||
movdqa [edi+2*eax], xmm6
|
|
||||||
movdqa [edi+ebx], xmm7
|
|
||||||
|
|
||||||
pop ebx
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; void WelsCopy8x16_mmx(uint8_t* Dst,
|
|
||||||
; int32_t iStrideD,
|
|
||||||
; uint8_t* Src,
|
|
||||||
; int32_t iStrideS )
|
|
||||||
;***********************************************************************
|
|
||||||
ALIGN 16
|
|
||||||
WelsCopy8x16_mmx:
|
|
||||||
push ebx
|
|
||||||
|
|
||||||
mov eax, [esp + 8 ] ;Dst
|
|
||||||
mov ecx, [esp + 12] ;iStrideD
|
|
||||||
mov ebx, [esp + 16] ;Src
|
|
||||||
mov edx, [esp + 20] ;iStrideS
|
|
||||||
|
|
||||||
movq mm0, [ebx]
|
|
||||||
movq mm1, [ebx+edx]
|
|
||||||
lea ebx, [ebx+2*edx]
|
|
||||||
movq mm2, [ebx]
|
|
||||||
movq mm3, [ebx+edx]
|
|
||||||
lea ebx, [ebx+2*edx]
|
|
||||||
movq mm4, [ebx]
|
|
||||||
movq mm5, [ebx+edx]
|
|
||||||
lea ebx, [ebx+2*edx]
|
|
||||||
movq mm6, [ebx]
|
|
||||||
movq mm7, [ebx+edx]
|
|
||||||
lea ebx, [ebx+2*edx]
|
|
||||||
|
|
||||||
movq [eax], mm0
|
|
||||||
movq [eax+ecx], mm1
|
|
||||||
lea eax, [eax+2*ecx]
|
|
||||||
movq [eax], mm2
|
|
||||||
movq [eax+ecx], mm3
|
|
||||||
lea eax, [eax+2*ecx]
|
|
||||||
movq [eax], mm4
|
|
||||||
movq [eax+ecx], mm5
|
|
||||||
lea eax, [eax+2*ecx]
|
|
||||||
movq [eax], mm6
|
|
||||||
movq [eax+ecx], mm7
|
|
||||||
lea eax, [eax+2*ecx]
|
|
||||||
|
|
||||||
movq mm0, [ebx]
|
|
||||||
movq mm1, [ebx+edx]
|
|
||||||
lea ebx, [ebx+2*edx]
|
|
||||||
movq mm2, [ebx]
|
|
||||||
movq mm3, [ebx+edx]
|
|
||||||
lea ebx, [ebx+2*edx]
|
|
||||||
movq mm4, [ebx]
|
|
||||||
movq mm5, [ebx+edx]
|
|
||||||
lea ebx, [ebx+2*edx]
|
|
||||||
movq mm6, [ebx]
|
|
||||||
movq mm7, [ebx+edx]
|
|
||||||
|
|
||||||
movq [eax], mm0
|
|
||||||
movq [eax+ecx], mm1
|
|
||||||
lea eax, [eax+2*ecx]
|
|
||||||
movq [eax], mm2
|
|
||||||
movq [eax+ecx], mm3
|
|
||||||
lea eax, [eax+2*ecx]
|
|
||||||
movq [eax], mm4
|
|
||||||
movq [eax+ecx], mm5
|
|
||||||
lea eax, [eax+2*ecx]
|
|
||||||
movq [eax], mm6
|
|
||||||
movq [eax+ecx], mm7
|
|
||||||
|
|
||||||
WELSEMMS
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; void WelsCopy8x8_mmx( uint8_t* Dst,
|
|
||||||
; int32_t iStrideD,
|
|
||||||
; uint8_t* Src,
|
|
||||||
; int32_t iStrideS )
|
|
||||||
;***********************************************************************
|
|
||||||
ALIGN 16
|
|
||||||
WelsCopy8x8_mmx:
|
|
||||||
push ebx
|
|
||||||
push esi
|
|
||||||
mov eax, [esp + 12] ;Dst
|
|
||||||
mov ecx, [esp + 16] ;iStrideD
|
|
||||||
mov esi, [esp + 20] ;Src
|
|
||||||
mov ebx, [esp + 24] ;iStrideS
|
|
||||||
lea edx, [ebx+2*ebx]
|
|
||||||
|
|
||||||
; to prefetch next loop
|
|
||||||
prefetchnta [esi+2*ebx]
|
|
||||||
prefetchnta [esi+edx]
|
|
||||||
movq mm0, [esi]
|
|
||||||
movq mm1, [esi+ebx]
|
|
||||||
lea esi, [esi+2*ebx]
|
|
||||||
; to prefetch next loop
|
|
||||||
prefetchnta [esi+2*ebx]
|
|
||||||
prefetchnta [esi+edx]
|
|
||||||
movq mm2, [esi]
|
|
||||||
movq mm3, [esi+ebx]
|
|
||||||
lea esi, [esi+2*ebx]
|
|
||||||
; to prefetch next loop
|
|
||||||
prefetchnta [esi+2*ebx]
|
|
||||||
prefetchnta [esi+edx]
|
|
||||||
movq mm4, [esi]
|
|
||||||
movq mm5, [esi+ebx]
|
|
||||||
lea esi, [esi+2*ebx]
|
|
||||||
movq mm6, [esi]
|
|
||||||
movq mm7, [esi+ebx]
|
|
||||||
|
|
||||||
movq [eax], mm0
|
|
||||||
movq [eax+ecx], mm1
|
|
||||||
lea eax, [eax+2*ecx]
|
|
||||||
movq [eax], mm2
|
|
||||||
movq [eax+ecx], mm3
|
|
||||||
lea eax, [eax+2*ecx]
|
|
||||||
movq [eax], mm4
|
|
||||||
movq [eax+ecx], mm5
|
|
||||||
lea eax, [eax+2*ecx]
|
|
||||||
movq [eax], mm6
|
|
||||||
movq [eax+ecx], mm7
|
|
||||||
|
|
||||||
WELSEMMS
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
|
|
||||||
; (dunhuang@cisco), 12/21/2011
|
|
||||||
;***********************************************************************
|
|
||||||
; void UpdateMbMv_sse2( SMVUnitXY *pMvBuffer, const SMVUnitXY sMv )
|
|
||||||
;***********************************************************************
|
|
||||||
ALIGN 16
|
|
||||||
UpdateMbMv_sse2:
|
|
||||||
mov eax, [esp+4] ; mv_buffer
|
|
||||||
movd xmm0, [esp+8] ; _mv
|
|
||||||
pshufd xmm1, xmm0, $0
|
|
||||||
movdqa [eax ], xmm1
|
|
||||||
movdqa [eax+0x10], xmm1
|
|
||||||
movdqa [eax+0x20], xmm1
|
|
||||||
movdqa [eax+0x30], xmm1
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Macros and other preprocessor constants
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Local Data (Read Only)
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
;SECTION .rodata pData align=16
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Various memory constants (trigonometric values or rounding values)
|
|
||||||
;***********************************************************************
|
|
||||||
;read unaligned memory
|
|
||||||
%macro SSE2_READ_UNA 2
|
|
||||||
movq %1, [%2]
|
|
||||||
movhps %1, [%2+8]
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
;write unaligned memory
|
|
||||||
%macro SSE2_WRITE_UNA 2
|
|
||||||
movq [%1], %2
|
|
||||||
movhps [%1+8], %2
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
|
|
||||||
;***********************************************************************
|
|
||||||
; Code
|
|
||||||
;***********************************************************************
|
|
||||||
|
|
||||||
SECTION .text
|
|
||||||
|
|
||||||
WELS_EXTERN PixelAvgWidthEq8_mmx
|
|
||||||
WELS_EXTERN PixelAvgWidthEq16_sse2
|
|
||||||
|
|
||||||
WELS_EXTERN McCopyWidthEq4_mmx
|
|
||||||
WELS_EXTERN McCopyWidthEq8_mmx
|
|
||||||
WELS_EXTERN McCopyWidthEq16_sse2
|
|
||||||
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************
|
|
||||||
; void PixelAvgWidthEq8_mmx( uint8_t *dst, int32_t iDstStride,
|
|
||||||
; uint8_t *pSrc1, int32_t iSrc1Stride,
|
|
||||||
; uint8_t *pSrc2, int32_t iSrc2Stride,
|
|
||||||
; int32_t iHeight );
|
|
||||||
;***********************************************************************
|
|
||||||
PixelAvgWidthEq8_mmx:
|
|
||||||
push ebp
|
|
||||||
push ebx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
|
|
||||||
mov edi, [esp+20]
|
|
||||||
mov esi, [esp+28]
|
|
||||||
mov edx, [esp+36]
|
|
||||||
mov ebp, [esp+24]
|
|
||||||
mov eax, [esp+32]
|
|
||||||
mov ebx, [esp+40]
|
|
||||||
mov ecx, [esp+44]
|
|
||||||
sar ecx, 2
|
|
||||||
.height_loop:
|
|
||||||
movq mm0, [esi]
|
|
||||||
pavgb mm0, [edx]
|
|
||||||
movq [edi], mm0
|
|
||||||
movq mm1, [esi+eax]
|
|
||||||
pavgb mm1, [edx+ebx]
|
|
||||||
movq [edi+ebp], mm1
|
|
||||||
lea edi, [edi+2*ebp]
|
|
||||||
lea esi, [esi+2*eax]
|
|
||||||
lea edx, [edx+2*ebx]
|
|
||||||
|
|
||||||
movq mm2, [esi]
|
|
||||||
pavgb mm2, [edx]
|
|
||||||
movq [edi], mm2
|
|
||||||
movq mm3, [esi+eax]
|
|
||||||
pavgb mm3, [edx+ebx]
|
|
||||||
movq [edi+ebp], mm3
|
|
||||||
lea edi, [edi+2*ebp]
|
|
||||||
lea esi, [esi+2*eax]
|
|
||||||
lea edx, [edx+2*ebx]
|
|
||||||
|
|
||||||
dec ecx
|
|
||||||
jne .height_loop
|
|
||||||
|
|
||||||
WELSEMMS
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
pop ebp
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************
|
|
||||||
; void PixelAvgWidthEq16_sse2( uint8_t *dst, int32_t iDstStride,
|
|
||||||
; uint8_t *pSrc1, int32_t iSrc1Stride,
|
|
||||||
; uint8_t *pSrc2, int32_t iSrc2Stride,
|
|
||||||
; int32_t iHeight );
|
|
||||||
;***********************************************************************
|
|
||||||
PixelAvgWidthEq16_sse2:
|
|
||||||
push ebp
|
|
||||||
push ebx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
|
|
||||||
mov edi, [esp+20]
|
|
||||||
mov esi, [esp+28]
|
|
||||||
mov edx, [esp+36]
|
|
||||||
mov ebp, [esp+24]
|
|
||||||
mov eax, [esp+32]
|
|
||||||
mov ebx, [esp+40]
|
|
||||||
mov ecx, [esp+44]
|
|
||||||
sar ecx, 2
|
|
||||||
.height_loop:
|
|
||||||
movdqu xmm0, [esi]
|
|
||||||
movdqu xmm1, [edx]
|
|
||||||
movdqu xmm2, [esi+eax]
|
|
||||||
movdqu xmm3, [edx+ebx]
|
|
||||||
pavgb xmm0, xmm1
|
|
||||||
pavgb xmm2, xmm3
|
|
||||||
movdqu [edi], xmm0
|
|
||||||
movdqu [edi+ebp], xmm2
|
|
||||||
lea edi, [edi+2*ebp]
|
|
||||||
lea esi, [esi+2*eax]
|
|
||||||
lea edx, [edx+2*ebx]
|
|
||||||
|
|
||||||
movdqu xmm4, [esi]
|
|
||||||
movdqu xmm5, [edx]
|
|
||||||
movdqu xmm6, [esi+eax]
|
|
||||||
movdqu xmm7, [edx+ebx]
|
|
||||||
pavgb xmm4, xmm5
|
|
||||||
pavgb xmm6, xmm7
|
|
||||||
movdqu [edi], xmm4
|
|
||||||
movdqu [edi+ebp], xmm6
|
|
||||||
lea edi, [edi+2*ebp]
|
|
||||||
lea esi, [esi+2*eax]
|
|
||||||
lea edx, [edx+2*ebx]
|
|
||||||
|
|
||||||
dec ecx
|
|
||||||
jne .height_loop
|
|
||||||
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
pop ebp
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
ALIGN 64
|
|
||||||
avg_w16_align_0_ssse3:
|
|
||||||
movdqa xmm1, [ebx]
|
|
||||||
movdqu xmm2, [ecx]
|
|
||||||
pavgb xmm1, xmm2
|
|
||||||
movdqa [edi], xmm1
|
|
||||||
add ebx, eax
|
|
||||||
add ecx, ebp
|
|
||||||
add edi, esi
|
|
||||||
dec dword [esp+4]
|
|
||||||
jg avg_w16_align_0_ssse3
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 64
|
|
||||||
avg_w16_align_1_ssse3:
|
|
||||||
movdqa xmm1, [ebx+16]
|
|
||||||
movdqu xmm2, [ecx]
|
|
||||||
palignr xmm1, [ebx], 1
|
|
||||||
pavgb xmm1, xmm2
|
|
||||||
movdqa [edi], xmm1
|
|
||||||
add ebx, eax
|
|
||||||
add ecx, ebp
|
|
||||||
add edi, esi
|
|
||||||
dec dword [esp+4]
|
|
||||||
jg avg_w16_align_1_ssse3
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************
|
|
||||||
; void PixelAvgWidthEq16_ssse3(uint8_t *pDst, int32_t iDstStride,
|
|
||||||
; uint8_t *pSrc1, int32_t iSrc1Stride,
|
|
||||||
; uint8_t *pSrc2, int32_t iSrc2Stride,
|
|
||||||
; int32_t iHeight );
|
|
||||||
;***********************************************************************
|
|
||||||
WELS_EXTERN PixelAvgWidthEq16_ssse3
|
|
||||||
PixelAvgWidthEq16_ssse3:
|
|
||||||
push ebp
|
|
||||||
push ebx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
|
|
||||||
mov edi, [esp+20] ; dst
|
|
||||||
mov ebx, [esp+28] ; src1
|
|
||||||
mov ecx, [esp+36] ; src2
|
|
||||||
mov esi, [esp+24] ; i_dst_stride
|
|
||||||
|
|
||||||
%define avg_w16_offset (avg_w16_align_1_ssse3-avg_w16_align_0_ssse3)
|
|
||||||
mov edx, ebx
|
|
||||||
and edx, 0x01
|
|
||||||
lea eax, [avg_w16_align_0_ssse3]
|
|
||||||
lea ebp, [avg_w16_offset]
|
|
||||||
imul ebp, edx
|
|
||||||
lea edx, [ebp+eax]
|
|
||||||
|
|
||||||
mov eax, [esp+32]
|
|
||||||
mov ebp, [esp+44]
|
|
||||||
push ebp
|
|
||||||
mov ebp, [esp+44]
|
|
||||||
and ebx, 0xfffffff0
|
|
||||||
call edx
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop ebx
|
|
||||||
pop ebp
|
|
||||||
ret
|
|
||||||
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;*******************************************************************************
|
|
||||||
; void McCopyWidthEq4_mmx( uint8_t *pSrc, int32_t iSrcStride,
|
|
||||||
; uint8_t *pDst, int32_t iDstStride, int32_t iHeight )
|
|
||||||
;*******************************************************************************
|
|
||||||
McCopyWidthEq4_mmx:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebx
|
|
||||||
|
|
||||||
|
|
||||||
mov esi, [esp+16]
|
|
||||||
mov eax, [esp+20]
|
|
||||||
mov edi, [esp+24]
|
|
||||||
mov ecx, [esp+28]
|
|
||||||
mov edx, [esp+32]
|
|
||||||
ALIGN 4
|
|
||||||
.height_loop:
|
|
||||||
mov ebx, [esi]
|
|
||||||
mov [edi], ebx
|
|
||||||
|
|
||||||
add esi, eax
|
|
||||||
add edi, ecx
|
|
||||||
dec edx
|
|
||||||
jnz .height_loop
|
|
||||||
WELSEMMS
|
|
||||||
pop ebx
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;*******************************************************************************
|
|
||||||
; void McCopyWidthEq8_mmx( uint8_t *pSrc, int32_t iSrcStride,
|
|
||||||
; uint8_t *pDst, int32_t iDstStride, int32_t iHeight )
|
|
||||||
;*******************************************************************************
|
|
||||||
McCopyWidthEq8_mmx:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
mov esi, [esp+12]
|
|
||||||
mov eax, [esp+16]
|
|
||||||
mov edi, [esp+20]
|
|
||||||
mov ecx, [esp+24]
|
|
||||||
mov edx, [esp+28]
|
|
||||||
|
|
||||||
ALIGN 4
|
|
||||||
.height_loop:
|
|
||||||
movq mm0, [esi]
|
|
||||||
movq [edi], mm0
|
|
||||||
add esi, eax
|
|
||||||
add edi, ecx
|
|
||||||
dec edx
|
|
||||||
jnz .height_loop
|
|
||||||
|
|
||||||
WELSEMMS
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
|
|
||||||
ALIGN 16
|
|
||||||
;***********************************************************************
|
|
||||||
; void McCopyWidthEq16_sse2( uint8_t *pSrc, int32_t iSrcStride, uint8_t *pDst, int32_t iDstStride, int32_t iHeight )
|
|
||||||
;***********************************************************************
|
|
||||||
McCopyWidthEq16_sse2:
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
|
|
||||||
mov esi, [esp+12]
|
|
||||||
mov eax, [esp+16]
|
|
||||||
mov edi, [esp+20]
|
|
||||||
mov edx, [esp+24]
|
|
||||||
mov ecx, [esp+28]
|
|
||||||
|
|
||||||
ALIGN 4
|
|
||||||
.height_loop:
|
|
||||||
SSE2_READ_UNA xmm0, esi
|
|
||||||
SSE2_READ_UNA xmm1, esi+eax
|
|
||||||
SSE2_WRITE_UNA edi, xmm0
|
|
||||||
SSE2_WRITE_UNA edi+edx, xmm1
|
|
||||||
|
|
||||||
sub ecx, 2
|
|
||||||
lea esi, [esi+eax*2]
|
|
||||||
lea edi, [edi+edx*2]
|
|
||||||
jnz .height_loop
|
|
||||||
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
ret
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -40,8 +40,6 @@
|
|||||||
;*
|
;*
|
||||||
;*************************************************************************/
|
;*************************************************************************/
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
%include "asm_inc.asm"
|
%include "asm_inc.asm"
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
; Code
|
; Code
|
||||||
@@ -55,8 +53,10 @@ ALIGN 16
|
|||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
WELS_EXTERN WelsPrefetchZero_mmx
|
WELS_EXTERN WelsPrefetchZero_mmx
|
||||||
WelsPrefetchZero_mmx:
|
WelsPrefetchZero_mmx:
|
||||||
mov eax,[esp+4]
|
%assign push_num 0
|
||||||
prefetchnta [eax]
|
LOAD_1_PARA
|
||||||
|
;mov eax,[esp+4]
|
||||||
|
prefetchnta [r0]
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
@@ -66,19 +66,21 @@ ALIGN 16
|
|||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
WELS_EXTERN WelsSetMemZeroAligned64_sse2
|
WELS_EXTERN WelsSetMemZeroAligned64_sse2
|
||||||
WelsSetMemZeroAligned64_sse2:
|
WelsSetMemZeroAligned64_sse2:
|
||||||
mov eax, [esp + 4] ; dst
|
|
||||||
mov ecx, [esp + 8]
|
%assign push_num 0
|
||||||
neg ecx
|
LOAD_2_PARA
|
||||||
|
SIGN_EXTENTION r1, r1d
|
||||||
|
neg r1
|
||||||
|
|
||||||
pxor xmm0, xmm0
|
pxor xmm0, xmm0
|
||||||
.memzeroa64_sse2_loops:
|
.memzeroa64_sse2_loops:
|
||||||
movdqa [eax], xmm0
|
movdqa [r0], xmm0
|
||||||
movdqa [eax+16], xmm0
|
movdqa [r0+16], xmm0
|
||||||
movdqa [eax+32], xmm0
|
movdqa [r0+32], xmm0
|
||||||
movdqa [eax+48], xmm0
|
movdqa [r0+48], xmm0
|
||||||
add eax, 0x40
|
add r0, 0x40
|
||||||
|
|
||||||
add ecx, 0x40
|
add r1, 0x40
|
||||||
jnz near .memzeroa64_sse2_loops
|
jnz near .memzeroa64_sse2_loops
|
||||||
|
|
||||||
ret
|
ret
|
||||||
@@ -89,23 +91,25 @@ ALIGN 16
|
|||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
WELS_EXTERN WelsSetMemZeroSize64_mmx
|
WELS_EXTERN WelsSetMemZeroSize64_mmx
|
||||||
WelsSetMemZeroSize64_mmx:
|
WelsSetMemZeroSize64_mmx:
|
||||||
mov eax, [esp + 4] ; dst
|
|
||||||
mov ecx, [esp + 8]
|
%assign push_num 0
|
||||||
neg ecx
|
LOAD_2_PARA
|
||||||
|
SIGN_EXTENTION r1, r1d
|
||||||
|
neg r1
|
||||||
|
|
||||||
pxor mm0, mm0
|
pxor mm0, mm0
|
||||||
.memzero64_mmx_loops:
|
.memzero64_mmx_loops:
|
||||||
movq [eax], mm0
|
movq [r0], mm0
|
||||||
movq [eax+8], mm0
|
movq [r0+8], mm0
|
||||||
movq [eax+16], mm0
|
movq [r0+16], mm0
|
||||||
movq [eax+24], mm0
|
movq [r0+24], mm0
|
||||||
movq [eax+32], mm0
|
movq [r0+32], mm0
|
||||||
movq [eax+40], mm0
|
movq [r0+40], mm0
|
||||||
movq [eax+48], mm0
|
movq [r0+48], mm0
|
||||||
movq [eax+56], mm0
|
movq [r0+56], mm0
|
||||||
add eax, 0x40
|
add r0, 0x40
|
||||||
|
|
||||||
add ecx, 0x40
|
add r1, 0x40
|
||||||
jnz near .memzero64_mmx_loops
|
jnz near .memzero64_mmx_loops
|
||||||
|
|
||||||
WELSEMMS
|
WELSEMMS
|
||||||
@@ -117,16 +121,18 @@ ALIGN 16
|
|||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
WELS_EXTERN WelsSetMemZeroSize8_mmx
|
WELS_EXTERN WelsSetMemZeroSize8_mmx
|
||||||
WelsSetMemZeroSize8_mmx:
|
WelsSetMemZeroSize8_mmx:
|
||||||
mov eax, [esp + 4] ; dst
|
|
||||||
mov ecx, [esp + 8] ; size
|
%assign push_num 0
|
||||||
neg ecx
|
LOAD_2_PARA
|
||||||
|
SIGN_EXTENTION r1, r1d
|
||||||
|
neg r1
|
||||||
pxor mm0, mm0
|
pxor mm0, mm0
|
||||||
|
|
||||||
.memzero8_mmx_loops:
|
.memzero8_mmx_loops:
|
||||||
movq [eax], mm0
|
movq [r0], mm0
|
||||||
add eax, 0x08
|
add r0, 0x08
|
||||||
|
|
||||||
add ecx, 0x08
|
add r1, 0x08
|
||||||
jnz near .memzero8_mmx_loops
|
jnz near .memzero8_mmx_loops
|
||||||
|
|
||||||
WELSEMMS
|
WELSEMMS
|
||||||
|
|||||||
@@ -42,7 +42,6 @@
|
|||||||
|
|
||||||
%include "asm_inc.asm"
|
%include "asm_inc.asm"
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
SECTION .text
|
SECTION .text
|
||||||
;************************************************
|
;************************************************
|
||||||
@@ -86,14 +85,16 @@ SECTION .text
|
|||||||
WELS_EXTERN WelsQuant4x4_sse2
|
WELS_EXTERN WelsQuant4x4_sse2
|
||||||
align 16
|
align 16
|
||||||
WelsQuant4x4_sse2:
|
WelsQuant4x4_sse2:
|
||||||
mov eax, [ff]
|
%assign push_num 0
|
||||||
mov ecx, [mf]
|
LOAD_3_PARA
|
||||||
MOVDQ xmm2, [eax]
|
;mov eax, [ff]
|
||||||
MOVDQ xmm3, [ecx]
|
;mov ecx, [mf]
|
||||||
|
movdqa xmm2, [r1]
|
||||||
|
movdqa xmm3, [r2]
|
||||||
|
|
||||||
mov edx, [pDct]
|
;mov edx, [pDct]
|
||||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [edx]
|
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0]
|
||||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [edx + 0x10]
|
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10]
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
@@ -103,15 +104,21 @@ WelsQuant4x4_sse2:
|
|||||||
WELS_EXTERN WelsQuant4x4Dc_sse2
|
WELS_EXTERN WelsQuant4x4Dc_sse2
|
||||||
align 16
|
align 16
|
||||||
WelsQuant4x4Dc_sse2:
|
WelsQuant4x4Dc_sse2:
|
||||||
mov ax, [mf]
|
%assign push_num 0
|
||||||
SSE2_Copy8Times xmm3, eax
|
LOAD_3_PARA
|
||||||
|
%ifndef X86_32
|
||||||
|
movsx r1, r1w
|
||||||
|
movsx r2, r2w
|
||||||
|
%endif
|
||||||
|
;mov ax, [mf]
|
||||||
|
SSE2_Copy8Times xmm3, r2d
|
||||||
|
|
||||||
mov cx, [ff]
|
;mov cx, [ff]
|
||||||
SSE2_Copy8Times xmm2, ecx
|
SSE2_Copy8Times xmm2, r1d
|
||||||
|
|
||||||
mov edx, [pDct]
|
;mov edx, [pDct]
|
||||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [edx]
|
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0]
|
||||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [edx + 0x10]
|
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10]
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
@@ -121,20 +128,22 @@ WelsQuant4x4Dc_sse2:
|
|||||||
WELS_EXTERN WelsQuantFour4x4_sse2
|
WELS_EXTERN WelsQuantFour4x4_sse2
|
||||||
align 16
|
align 16
|
||||||
WelsQuantFour4x4_sse2:
|
WelsQuantFour4x4_sse2:
|
||||||
mov eax, [ff]
|
%assign push_num 0
|
||||||
mov ecx, [mf]
|
LOAD_3_PARA
|
||||||
MOVDQ xmm2, [eax]
|
;mov eax, [ff]
|
||||||
MOVDQ xmm3, [ecx]
|
;mov ecx, [mf]
|
||||||
|
MOVDQ xmm2, [r1]
|
||||||
|
MOVDQ xmm3, [r2]
|
||||||
|
|
||||||
mov edx, [pDct]
|
;mov edx, [pDct]
|
||||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [edx]
|
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0]
|
||||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [edx + 0x10]
|
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10]
|
||||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [edx + 0x20]
|
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x20]
|
||||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [edx + 0x30]
|
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x30]
|
||||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [edx + 0x40]
|
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x40]
|
||||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [edx + 0x50]
|
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x50]
|
||||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [edx + 0x60]
|
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x60]
|
||||||
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [edx + 0x70]
|
SSE2_Quant8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x70]
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
@@ -144,24 +153,26 @@ WelsQuantFour4x4_sse2:
|
|||||||
WELS_EXTERN WelsQuantFour4x4Max_sse2
|
WELS_EXTERN WelsQuantFour4x4Max_sse2
|
||||||
align 16
|
align 16
|
||||||
WelsQuantFour4x4Max_sse2:
|
WelsQuantFour4x4Max_sse2:
|
||||||
mov eax, [ff]
|
%assign push_num 0
|
||||||
mov ecx, [mf]
|
LOAD_4_PARA
|
||||||
MOVDQ xmm2, [eax]
|
;mov eax, [ff]
|
||||||
MOVDQ xmm3, [ecx]
|
;mov ecx, [mf]
|
||||||
|
MOVDQ xmm2, [r1]
|
||||||
|
MOVDQ xmm3, [r2]
|
||||||
|
|
||||||
mov edx, [pDct]
|
;mov edx, [pDct]
|
||||||
pxor xmm4, xmm4
|
pxor xmm4, xmm4
|
||||||
pxor xmm5, xmm5
|
pxor xmm5, xmm5
|
||||||
pxor xmm6, xmm6
|
pxor xmm6, xmm6
|
||||||
pxor xmm7, xmm7
|
pxor xmm7, xmm7
|
||||||
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [edx ], xmm4
|
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 ], xmm4
|
||||||
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [edx + 0x10], xmm4
|
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x10], xmm4
|
||||||
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [edx + 0x20], xmm5
|
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x20], xmm5
|
||||||
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [edx + 0x30], xmm5
|
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x30], xmm5
|
||||||
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [edx + 0x40], xmm6
|
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x40], xmm6
|
||||||
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [edx + 0x50], xmm6
|
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x50], xmm6
|
||||||
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [edx + 0x60], xmm7
|
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x60], xmm7
|
||||||
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [edx + 0x70], xmm7
|
SSE2_QuantMax8 xmm0, xmm1, xmm2, xmm3, [r0 + 0x70], xmm7
|
||||||
|
|
||||||
SSE2_TransTwo4x4W xmm4, xmm5, xmm6, xmm7, xmm0
|
SSE2_TransTwo4x4W xmm4, xmm5, xmm6, xmm7, xmm0
|
||||||
pmaxsw xmm0, xmm4
|
pmaxsw xmm0, xmm4
|
||||||
@@ -171,9 +182,9 @@ WelsQuantFour4x4Max_sse2:
|
|||||||
punpckhqdq xmm0, xmm1
|
punpckhqdq xmm0, xmm1
|
||||||
pmaxsw xmm0, xmm1
|
pmaxsw xmm0, xmm1
|
||||||
|
|
||||||
mov edx, [max]
|
;mov r0, [r3]
|
||||||
movq [edx], xmm0
|
movq [r3], xmm0
|
||||||
|
LOAD_4_PARA_POP
|
||||||
ret
|
ret
|
||||||
|
|
||||||
%macro MMX_Copy4Times 2
|
%macro MMX_Copy4Times 2
|
||||||
@@ -203,21 +214,20 @@ SECTION .text
|
|||||||
WELS_EXTERN WelsHadamardQuant2x2_mmx
|
WELS_EXTERN WelsHadamardQuant2x2_mmx
|
||||||
align 16
|
align 16
|
||||||
WelsHadamardQuant2x2_mmx:
|
WelsHadamardQuant2x2_mmx:
|
||||||
|
%assign push_num 0
|
||||||
mov eax, [pDct]
|
LOAD_5_PARA
|
||||||
movd mm0, [eax]
|
%ifndef X86_32
|
||||||
movd mm1, [eax + 0x20]
|
movsx r1, r1w
|
||||||
|
movsx r2, r2w
|
||||||
|
%endif
|
||||||
|
;mov eax, [pDct]
|
||||||
|
movd mm0, [r0]
|
||||||
|
movd mm1, [r0 + 0x20]
|
||||||
punpcklwd mm0, mm1
|
punpcklwd mm0, mm1
|
||||||
movd mm3, [eax + 0x40]
|
movd mm3, [r0 + 0x40]
|
||||||
movd mm1, [eax + 0x60]
|
movd mm1, [r0 + 0x60]
|
||||||
punpcklwd mm3, mm1
|
punpcklwd mm3, mm1
|
||||||
|
|
||||||
mov cx, 0
|
|
||||||
mov [eax], cx
|
|
||||||
mov [eax + 0x20], cx
|
|
||||||
mov [eax + 0x40], cx
|
|
||||||
mov [eax + 0x60], cx
|
|
||||||
|
|
||||||
;hdm_2x2, mm0 = dct0 dct1, mm3 = dct2 dct3
|
;hdm_2x2, mm0 = dct0 dct1, mm3 = dct2 dct3
|
||||||
movq mm5, mm3
|
movq mm5, mm3
|
||||||
paddw mm3, mm0
|
paddw mm3, mm0
|
||||||
@@ -231,17 +241,17 @@ WelsHadamardQuant2x2_mmx:
|
|||||||
punpcklwd mm1, mm3
|
punpcklwd mm1, mm3
|
||||||
|
|
||||||
;quant_2x2_dc
|
;quant_2x2_dc
|
||||||
mov ax, [mf]
|
;mov ax, [mf]
|
||||||
MMX_Copy4Times mm3, eax
|
MMX_Copy4Times mm3, r2d
|
||||||
mov cx, [ff]
|
;mov cx, [ff]
|
||||||
MMX_Copy4Times mm2, ecx
|
MMX_Copy4Times mm2, r1d
|
||||||
MMX_Quant4 mm1, mm0, mm2, mm3
|
MMX_Quant4 mm1, mm0, mm2, mm3
|
||||||
|
|
||||||
; store dct_2x2
|
; store dct_2x2
|
||||||
mov edx, [dct2x2]
|
;mov edx, [dct2x2]
|
||||||
movq [edx], mm1
|
movq [r3], mm1
|
||||||
mov ecx, [iChromaDc]
|
;mov ecx, [iChromaDc]
|
||||||
movq [ecx], mm1
|
movq [r4], mm1
|
||||||
|
|
||||||
; pNonZeroCount of dct_2x2
|
; pNonZeroCount of dct_2x2
|
||||||
pcmpeqb mm2, mm2 ; mm2 = FF
|
pcmpeqb mm2, mm2 ; mm2 = FF
|
||||||
@@ -250,9 +260,17 @@ WelsHadamardQuant2x2_mmx:
|
|||||||
pcmpeqb mm1, mm3 ; set FF if equal, 0 if not equal
|
pcmpeqb mm1, mm3 ; set FF if equal, 0 if not equal
|
||||||
psubsb mm1, mm2 ; set 0 if equal, 1 if not equal
|
psubsb mm1, mm2 ; set 0 if equal, 1 if not equal
|
||||||
psadbw mm1, mm3 ;
|
psadbw mm1, mm3 ;
|
||||||
movd eax, mm1
|
mov r1w, 0
|
||||||
|
mov [r0], r1w
|
||||||
|
mov [r0 + 0x20], r1w
|
||||||
|
mov [r0 + 0x40], r1w
|
||||||
|
mov [r0 + 0x60], r1w
|
||||||
|
|
||||||
|
|
||||||
|
movd retrd, mm1
|
||||||
|
|
||||||
WELSEMMS
|
WELSEMMS
|
||||||
|
LOAD_5_PARA_POP
|
||||||
ret
|
ret
|
||||||
|
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
@@ -261,13 +279,18 @@ WelsHadamardQuant2x2_mmx:
|
|||||||
WELS_EXTERN WelsHadamardQuant2x2Skip_mmx
|
WELS_EXTERN WelsHadamardQuant2x2Skip_mmx
|
||||||
align 16
|
align 16
|
||||||
WelsHadamardQuant2x2Skip_mmx:
|
WelsHadamardQuant2x2Skip_mmx:
|
||||||
|
%assign push_num 0
|
||||||
mov eax, [pDct]
|
LOAD_3_PARA
|
||||||
movd mm0, [eax]
|
%ifndef X86_32
|
||||||
movd mm1, [eax + 0x20]
|
movsx r1, r1w
|
||||||
|
movsx r2, r2w
|
||||||
|
%endif
|
||||||
|
;mov eax, [pDct]
|
||||||
|
movd mm0, [r0]
|
||||||
|
movd mm1, [r0 + 0x20]
|
||||||
punpcklwd mm0, mm1
|
punpcklwd mm0, mm1
|
||||||
movd mm3, [eax + 0x40]
|
movd mm3, [r0 + 0x40]
|
||||||
movd mm1, [eax + 0x60]
|
movd mm1, [r0 + 0x60]
|
||||||
punpcklwd mm3, mm1
|
punpcklwd mm3, mm1
|
||||||
|
|
||||||
;hdm_2x2, mm0 = dct0 dct1, mm3 = dct2 dct3
|
;hdm_2x2, mm0 = dct0 dct1, mm3 = dct2 dct3
|
||||||
@@ -283,10 +306,10 @@ WelsHadamardQuant2x2Skip_mmx:
|
|||||||
punpcklwd mm1, mm3
|
punpcklwd mm1, mm3
|
||||||
|
|
||||||
;quant_2x2_dc
|
;quant_2x2_dc
|
||||||
mov ax, [mf]
|
;mov ax, [mf]
|
||||||
MMX_Copy4Times mm3, eax
|
MMX_Copy4Times mm3, r2d
|
||||||
mov cx, [ff]
|
;mov cx, [ff]
|
||||||
MMX_Copy4Times mm2, ecx
|
MMX_Copy4Times mm2, r1d
|
||||||
MMX_Quant4 mm1, mm0, mm2, mm3
|
MMX_Quant4 mm1, mm0, mm2, mm3
|
||||||
|
|
||||||
; pNonZeroCount of dct_2x2
|
; pNonZeroCount of dct_2x2
|
||||||
@@ -296,7 +319,7 @@ WelsHadamardQuant2x2Skip_mmx:
|
|||||||
pcmpeqb mm1, mm3 ; set FF if equal, 0 if not equal
|
pcmpeqb mm1, mm3 ; set FF if equal, 0 if not equal
|
||||||
psubsb mm1, mm2 ; set 0 if equal, 1 if not equal
|
psubsb mm1, mm2 ; set 0 if equal, 1 if not equal
|
||||||
psadbw mm1, mm3 ;
|
psadbw mm1, mm3 ;
|
||||||
movd eax, mm1
|
movd retrd, mm1
|
||||||
|
|
||||||
WELSEMMS
|
WELSEMMS
|
||||||
ret
|
ret
|
||||||
@@ -317,12 +340,14 @@ align 16
|
|||||||
WELS_EXTERN WelsDequant4x4_sse2
|
WELS_EXTERN WelsDequant4x4_sse2
|
||||||
WelsDequant4x4_sse2:
|
WelsDequant4x4_sse2:
|
||||||
;ecx = dequant_mf[qp], edx = pDct
|
;ecx = dequant_mf[qp], edx = pDct
|
||||||
mov ecx, [esp + 8]
|
%assign push_num 0
|
||||||
mov edx, [esp + 4]
|
LOAD_2_PARA
|
||||||
|
;mov ecx, [esp + 8]
|
||||||
|
;mov edx, [esp + 4]
|
||||||
|
|
||||||
movdqa xmm1, [ecx]
|
movdqa xmm1, [r1]
|
||||||
SSE2_DeQuant8 [edx ], xmm0, xmm1
|
SSE2_DeQuant8 [r0 ], xmm0, xmm1
|
||||||
SSE2_DeQuant8 [edx+0x10 ], xmm0, xmm1
|
SSE2_DeQuant8 [r0 + 0x10], xmm0, xmm1
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
@@ -335,18 +360,20 @@ align 16
|
|||||||
WELS_EXTERN WelsDequantFour4x4_sse2
|
WELS_EXTERN WelsDequantFour4x4_sse2
|
||||||
WelsDequantFour4x4_sse2:
|
WelsDequantFour4x4_sse2:
|
||||||
;ecx = dequant_mf[qp], edx = pDct
|
;ecx = dequant_mf[qp], edx = pDct
|
||||||
mov ecx, [esp + 8]
|
%assign push_num 0
|
||||||
mov edx, [esp + 4]
|
LOAD_2_PARA
|
||||||
|
;mov ecx, [esp + 8]
|
||||||
|
;mov edx, [esp + 4]
|
||||||
|
|
||||||
movdqa xmm1, [ecx]
|
movdqa xmm1, [r1]
|
||||||
SSE2_DeQuant8 [edx ], xmm0, xmm1
|
SSE2_DeQuant8 [r0 ], xmm0, xmm1
|
||||||
SSE2_DeQuant8 [edx+0x10 ], xmm0, xmm1
|
SSE2_DeQuant8 [r0+0x10 ], xmm0, xmm1
|
||||||
SSE2_DeQuant8 [edx+0x20 ], xmm0, xmm1
|
SSE2_DeQuant8 [r0+0x20 ], xmm0, xmm1
|
||||||
SSE2_DeQuant8 [edx+0x30 ], xmm0, xmm1
|
SSE2_DeQuant8 [r0+0x30 ], xmm0, xmm1
|
||||||
SSE2_DeQuant8 [edx+0x40 ], xmm0, xmm1
|
SSE2_DeQuant8 [r0+0x40 ], xmm0, xmm1
|
||||||
SSE2_DeQuant8 [edx+0x50 ], xmm0, xmm1
|
SSE2_DeQuant8 [r0+0x50 ], xmm0, xmm1
|
||||||
SSE2_DeQuant8 [edx+0x60 ], xmm0, xmm1
|
SSE2_DeQuant8 [r0+0x60 ], xmm0, xmm1
|
||||||
SSE2_DeQuant8 [edx+0x70 ], xmm0, xmm1
|
SSE2_DeQuant8 [r0+0x70 ], xmm0, xmm1
|
||||||
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
@@ -356,14 +383,19 @@ WelsDequantFour4x4_sse2:
|
|||||||
WELS_EXTERN WelsDequantIHadamard4x4_sse2
|
WELS_EXTERN WelsDequantIHadamard4x4_sse2
|
||||||
align 16
|
align 16
|
||||||
WelsDequantIHadamard4x4_sse2:
|
WelsDequantIHadamard4x4_sse2:
|
||||||
mov eax, [esp + 4]
|
%assign push_num 0
|
||||||
mov cx, [esp + 8]
|
LOAD_2_PARA
|
||||||
|
%ifndef X86_32
|
||||||
|
movzx r1, r1w
|
||||||
|
%endif
|
||||||
|
;mov eax, [esp + 4]
|
||||||
|
;mov cx, [esp + 8]
|
||||||
|
|
||||||
; WelsDequantLumaDc4x4
|
; WelsDequantLumaDc4x4
|
||||||
SSE2_Copy8Times xmm1, ecx
|
SSE2_Copy8Times xmm1, r1d
|
||||||
;psrlw xmm1, 2 ; for the (>>2) in ihdm
|
;psrlw xmm1, 2 ; for the (>>2) in ihdm
|
||||||
MOVDQ xmm0, [eax]
|
MOVDQ xmm0, [r0]
|
||||||
MOVDQ xmm2, [eax+0x10]
|
MOVDQ xmm2, [r0+0x10]
|
||||||
pmullw xmm0, xmm1
|
pmullw xmm0, xmm1
|
||||||
pmullw xmm2, xmm1
|
pmullw xmm2, xmm1
|
||||||
|
|
||||||
@@ -386,8 +418,8 @@ WelsDequantIHadamard4x4_sse2:
|
|||||||
SSE2_TransTwo4x4W xmm0, xmm1, xmm4, xmm2, xmm3
|
SSE2_TransTwo4x4W xmm0, xmm1, xmm4, xmm2, xmm3
|
||||||
|
|
||||||
punpcklqdq xmm0, xmm1
|
punpcklqdq xmm0, xmm1
|
||||||
MOVDQ [eax], xmm0
|
MOVDQ [r0], xmm0
|
||||||
|
|
||||||
punpcklqdq xmm2, xmm3
|
punpcklqdq xmm2, xmm3
|
||||||
MOVDQ [eax+16], xmm2
|
MOVDQ [r0+16], xmm2
|
||||||
ret
|
ret
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -42,8 +42,6 @@
|
|||||||
|
|
||||||
%include "asm_inc.asm"
|
%include "asm_inc.asm"
|
||||||
|
|
||||||
bits 32
|
|
||||||
|
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
; Macros
|
; Macros
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
@@ -171,25 +169,34 @@ SECTION .text
|
|||||||
ALIGN 16
|
ALIGN 16
|
||||||
WELS_EXTERN WelsScan4x4DcAc_sse2
|
WELS_EXTERN WelsScan4x4DcAc_sse2
|
||||||
WelsScan4x4DcAc_sse2:
|
WelsScan4x4DcAc_sse2:
|
||||||
|
%ifdef X86_32
|
||||||
mov eax, [esp+8]
|
push r3
|
||||||
movdqa xmm0, [eax] ; 7 6 5 4 3 2 1 0
|
%assign push_num 1
|
||||||
movdqa xmm1, [eax+16] ; f e d c b a 9 8
|
%else
|
||||||
pextrw ecx, xmm0, 7 ; ecx = 7
|
%assign push_num 0
|
||||||
pextrw edx, xmm1, 2 ; edx = a
|
%endif
|
||||||
pextrw eax, xmm0, 5 ; eax = 5
|
LOAD_2_PARA
|
||||||
pinsrw xmm1, ecx, 2 ; f e d c b 7 9 8
|
;mov eax, [esp+8]
|
||||||
pinsrw xmm0, eax, 7 ; 5 6 5 4 3 2 1 0
|
movdqa xmm0, [r1] ; 7 6 5 4 3 2 1 0
|
||||||
pextrw ecx, xmm1, 0 ; ecx = 8
|
movdqa xmm1, [r1+16] ; f e d c b a 9 8
|
||||||
pinsrw xmm0, ecx, 5 ; 5 6 8 4 3 2 1 0
|
pextrw r2d, xmm0, 7 ; ecx = 7
|
||||||
pinsrw xmm1, edx, 0 ; f e d c b 7 9 a
|
pextrw r3d, xmm1, 2 ; edx = a
|
||||||
|
pextrw r1d, xmm0, 5 ; eax = 5
|
||||||
|
pinsrw xmm1, r2d, 2 ; f e d c b 7 9 8
|
||||||
|
pinsrw xmm0, r1d, 7 ; 5 6 5 4 3 2 1 0
|
||||||
|
pextrw r2d, xmm1, 0 ; ecx = 8
|
||||||
|
pinsrw xmm0, r2d, 5 ; 5 6 8 4 3 2 1 0
|
||||||
|
pinsrw xmm1, r3d, 0 ; f e d c b 7 9 a
|
||||||
pshufd xmm2, xmm0, 0xd8 ; 5 6 3 2 8 4 1 0
|
pshufd xmm2, xmm0, 0xd8 ; 5 6 3 2 8 4 1 0
|
||||||
pshufd xmm3, xmm1, 0xd8 ; f e b 7 d c 9 a
|
pshufd xmm3, xmm1, 0xd8 ; f e b 7 d c 9 a
|
||||||
pshufhw xmm0, xmm2, 0x93 ; 6 3 2 5 8 4 1 0
|
pshufhw xmm0, xmm2, 0x93 ; 6 3 2 5 8 4 1 0
|
||||||
pshuflw xmm1, xmm3, 0x39 ; f e b 7 a d c 9
|
pshuflw xmm1, xmm3, 0x39 ; f e b 7 a d c 9
|
||||||
mov eax, [esp+4]
|
;mov eax, [esp+4]
|
||||||
movdqa [eax],xmm0
|
movdqa [r0],xmm0
|
||||||
movdqa [eax+16], xmm1
|
movdqa [r0+16], xmm1
|
||||||
|
%ifdef X86_32
|
||||||
|
pop r3
|
||||||
|
%endif
|
||||||
ret
|
ret
|
||||||
|
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
@@ -198,19 +205,21 @@ WelsScan4x4DcAc_sse2:
|
|||||||
ALIGN 16
|
ALIGN 16
|
||||||
WELS_EXTERN WelsScan4x4DcAc_ssse3
|
WELS_EXTERN WelsScan4x4DcAc_ssse3
|
||||||
WelsScan4x4DcAc_ssse3:
|
WelsScan4x4DcAc_ssse3:
|
||||||
mov eax, [esp+8]
|
%assign push_num 0
|
||||||
movdqa xmm0, [eax]
|
LOAD_2_PARA
|
||||||
movdqa xmm1, [eax+16]
|
;mov eax, [esp+8]
|
||||||
pextrw ecx, xmm0, 7 ; ecx = [7]
|
movdqa xmm0, [r1]
|
||||||
pextrw eax, xmm1, 0 ; eax = [8]
|
movdqa xmm1, [r1+16]
|
||||||
pinsrw xmm0, eax, 7 ; xmm0[7] = [8]
|
pextrw r2d, xmm0, 7 ; ecx = [7]
|
||||||
pinsrw xmm1, ecx, 0 ; xmm1[0] = [7]
|
pextrw r1d, xmm1, 0 ; eax = [8]
|
||||||
|
pinsrw xmm0, r1d, 7 ; xmm0[7] = [8]
|
||||||
|
pinsrw xmm1, r2d, 0 ; xmm1[0] = [7]
|
||||||
pshufb xmm1, [pb_scanacdc_maskb]
|
pshufb xmm1, [pb_scanacdc_maskb]
|
||||||
pshufb xmm0, [pb_scanacdc_maska]
|
pshufb xmm0, [pb_scanacdc_maska]
|
||||||
|
|
||||||
mov eax, [esp+4]
|
;mov eax, [esp+4]
|
||||||
movdqa [eax],xmm0
|
movdqa [r0],xmm0
|
||||||
movdqa [eax+16], xmm1
|
movdqa [r0+16], xmm1
|
||||||
ret
|
ret
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
;void WelsScan4x4Ac_sse2( int16_t* zig_value, int16_t* pDct )
|
;void WelsScan4x4Ac_sse2( int16_t* zig_value, int16_t* pDct )
|
||||||
@@ -218,9 +227,11 @@ WelsScan4x4DcAc_ssse3:
|
|||||||
ALIGN 16
|
ALIGN 16
|
||||||
WELS_EXTERN WelsScan4x4Ac_sse2
|
WELS_EXTERN WelsScan4x4Ac_sse2
|
||||||
WelsScan4x4Ac_sse2:
|
WelsScan4x4Ac_sse2:
|
||||||
mov eax, [esp+8]
|
%assign push_num 0
|
||||||
movdqa xmm0, [eax]
|
LOAD_2_PARA
|
||||||
movdqa xmm1, [eax+16]
|
;mov eax, [esp+8]
|
||||||
|
movdqa xmm0, [r1]
|
||||||
|
movdqa xmm1, [r1+16]
|
||||||
movdqa xmm2, xmm0
|
movdqa xmm2, xmm0
|
||||||
punpcklqdq xmm0, xmm1
|
punpcklqdq xmm0, xmm1
|
||||||
punpckhqdq xmm2, xmm1
|
punpckhqdq xmm2, xmm1
|
||||||
@@ -228,14 +239,14 @@ WelsScan4x4Ac_sse2:
|
|||||||
movdqa xmm3, xmm0
|
movdqa xmm3, xmm0
|
||||||
punpckldq xmm0, xmm2
|
punpckldq xmm0, xmm2
|
||||||
punpckhdq xmm3, xmm2
|
punpckhdq xmm3, xmm2
|
||||||
pextrw eax , xmm0, 3
|
pextrw r1d , xmm0, 3
|
||||||
pextrw edx , xmm0, 7
|
pextrw r2d , xmm0, 7
|
||||||
pinsrw xmm0, eax, 7
|
pinsrw xmm0, r1d, 7
|
||||||
pextrw eax, xmm3, 4
|
pextrw r1d, xmm3, 4
|
||||||
pinsrw xmm3, edx, 4
|
pinsrw xmm3, r2d, 4
|
||||||
pextrw edx, xmm3, 0
|
pextrw r2d, xmm3, 0
|
||||||
pinsrw xmm3, eax, 0
|
pinsrw xmm3, r1d, 0
|
||||||
pinsrw xmm0, edx, 3
|
pinsrw xmm0, r2d, 3
|
||||||
|
|
||||||
pshufhw xmm1, xmm0, 0x93
|
pshufhw xmm1, xmm0, 0x93
|
||||||
pshuflw xmm2, xmm3, 0x39
|
pshuflw xmm2, xmm3, 0x39
|
||||||
@@ -245,9 +256,9 @@ WelsScan4x4Ac_sse2:
|
|||||||
pslldq xmm3, 14
|
pslldq xmm3, 14
|
||||||
por xmm1, xmm3
|
por xmm1, xmm3
|
||||||
psrldq xmm2, 2
|
psrldq xmm2, 2
|
||||||
mov eax, [esp+4]
|
;mov eax, [esp+4]
|
||||||
movdqa [eax],xmm1
|
movdqa [r0],xmm1
|
||||||
movdqa [eax+16], xmm2
|
movdqa [r0+16], xmm2
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
@@ -257,44 +268,60 @@ WelsScan4x4Ac_sse2:
|
|||||||
ALIGN 16
|
ALIGN 16
|
||||||
WELS_EXTERN WelsCalculateSingleCtr4x4_sse2
|
WELS_EXTERN WelsCalculateSingleCtr4x4_sse2
|
||||||
WelsCalculateSingleCtr4x4_sse2:
|
WelsCalculateSingleCtr4x4_sse2:
|
||||||
push ebx
|
;push ebx
|
||||||
mov eax, [esp+8]
|
;mov eax, [esp+8]
|
||||||
movdqa xmm0, [eax]
|
%ifdef X86_32
|
||||||
movdqa xmm1, [eax+16]
|
push r3
|
||||||
|
%assign push_num 1
|
||||||
|
%else
|
||||||
|
%assign push_num 0
|
||||||
|
%endif
|
||||||
|
LOAD_1_PARA
|
||||||
|
movdqa xmm0, [r0]
|
||||||
|
movdqa xmm1, [r0+16]
|
||||||
|
|
||||||
packsswb xmm0, xmm1
|
packsswb xmm0, xmm1
|
||||||
|
; below is the register map: r0 - eax, r1 - ebx, r2 - ecx, r3 - edx
|
||||||
|
xor r3, r3
|
||||||
pxor xmm3, xmm3
|
pxor xmm3, xmm3
|
||||||
pcmpeqb xmm0, xmm3
|
pcmpeqb xmm0, xmm3
|
||||||
pmovmskb edx, xmm0
|
pmovmskb r3d, xmm0
|
||||||
|
|
||||||
xor edx, 0xffff
|
xor r3, 0xffff
|
||||||
|
|
||||||
xor eax, eax
|
xor r0, r0
|
||||||
mov ecx, 7
|
mov r2, 7
|
||||||
mov ebx, 8
|
mov r1, 8
|
||||||
.loop_low8_find1:
|
.loop_low8_find1:
|
||||||
bt edx, ecx
|
bt r3, r2
|
||||||
jc .loop_high8_find1
|
jc .loop_high8_find1
|
||||||
loop .loop_low8_find1
|
dec r2
|
||||||
|
jnz .loop_low8_find1
|
||||||
.loop_high8_find1:
|
.loop_high8_find1:
|
||||||
bt edx, ebx
|
bt r3, r1
|
||||||
jc .find1end
|
jc .find1end
|
||||||
inc ebx
|
inc r1
|
||||||
cmp ebx,16
|
cmp r1,16
|
||||||
jb .loop_high8_find1
|
jb .loop_high8_find1
|
||||||
.find1end:
|
.find1end:
|
||||||
sub ebx, ecx
|
sub r1, r2
|
||||||
sub ebx, 1
|
sub r1, 1
|
||||||
add al, [i_ds_table+ebx]
|
lea r2, [i_ds_table]
|
||||||
mov ebx, edx
|
add r0b, [r2+r1]
|
||||||
and edx, 0xff
|
mov r1, r3
|
||||||
shr ebx, 8
|
and r3, 0xff
|
||||||
and ebx, 0xff
|
shr r1, 8
|
||||||
add al, [low_mask_table +edx]
|
and r1, 0xff
|
||||||
add al, [high_mask_table+ebx]
|
lea r2 , [low_mask_table]
|
||||||
|
add r0b, [r2 +r3]
|
||||||
pop ebx
|
lea r2, [high_mask_table]
|
||||||
|
add r0b, [r2+r1]
|
||||||
|
%ifdef X86_32
|
||||||
|
pop r3
|
||||||
|
%else
|
||||||
|
mov retrd, r0d
|
||||||
|
%endif
|
||||||
|
;pop ebx
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
||||||
@@ -304,21 +331,29 @@ WelsCalculateSingleCtr4x4_sse2:
|
|||||||
ALIGN 16
|
ALIGN 16
|
||||||
WELS_EXTERN WelsGetNoneZeroCount_sse2
|
WELS_EXTERN WelsGetNoneZeroCount_sse2
|
||||||
WelsGetNoneZeroCount_sse2:
|
WelsGetNoneZeroCount_sse2:
|
||||||
mov eax, [esp+4]
|
%assign push_num 0
|
||||||
movdqa xmm0, [eax]
|
LOAD_1_PARA
|
||||||
movdqa xmm1, [eax+16]
|
;mov eax, [esp+4]
|
||||||
|
movdqa xmm0, [r0]
|
||||||
|
movdqa xmm1, [r0+16]
|
||||||
pxor xmm2, xmm2
|
pxor xmm2, xmm2
|
||||||
pcmpeqw xmm0, xmm2
|
pcmpeqw xmm0, xmm2
|
||||||
pcmpeqw xmm1, xmm2
|
pcmpeqw xmm1, xmm2
|
||||||
packsswb xmm1, xmm0
|
packsswb xmm1, xmm0
|
||||||
pmovmskb edx, xmm1
|
xor r1, r1
|
||||||
xor edx, 0xffff
|
pmovmskb r1d, xmm1
|
||||||
mov ecx, edx
|
xor r1d, 0xffff
|
||||||
and edx, 0xff
|
mov r2, r1
|
||||||
shr ecx, 8
|
and r1, 0xff
|
||||||
|
shr r2, 8
|
||||||
; and ecx, 0xff ; we do not need this due to high 16bits equal to 0 yet
|
; and ecx, 0xff ; we do not need this due to high 16bits equal to 0 yet
|
||||||
xor eax, eax
|
; xor retr, retr
|
||||||
add al, [nozero_count_table+ecx]
|
;add al, [nozero_count_table+r2]
|
||||||
add al, [nozero_count_table+edx]
|
lea r0 , [nozero_count_table]
|
||||||
|
movzx r2, byte [r0+r2]
|
||||||
|
movzx r1, byte [r0+r1]
|
||||||
|
mov retrq, r2
|
||||||
|
add retrq, r1
|
||||||
|
;add al, [nozero_count_table+r1]
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|||||||
@@ -61,15 +61,15 @@ void McCopyWidthEq4_mmx (uint8_t*, int32_t, uint8_t*, int32_t, int32_t);
|
|||||||
void McCopyWidthEq8_mmx (uint8_t*, int32_t, uint8_t*, int32_t, int32_t);
|
void McCopyWidthEq8_mmx (uint8_t*, int32_t, uint8_t*, int32_t, int32_t);
|
||||||
void PixelAvgWidthEq8_mmx (uint8_t*, int32_t, uint8_t*, int32_t, uint8_t*, int32_t, int32_t);
|
void PixelAvgWidthEq8_mmx (uint8_t*, int32_t, uint8_t*, int32_t, uint8_t*, int32_t, int32_t);
|
||||||
|
|
||||||
void McHorVer20_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
|
void McHorVer20Width9Or17_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
|
||||||
int32_t iHeight);
|
int32_t iHeight);
|
||||||
void McHorVer02_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
|
void McHorVer02Height9Or17_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
|
||||||
int32_t iHeight);
|
int32_t iHeight);
|
||||||
void McHorVer22HorFirst_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pTap, int32_t iTapStride, int32_t iWidth,
|
void McHorVer22HorFirst_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pTap, int32_t iTapStride, int32_t iWidth,
|
||||||
int32_t iHeight);
|
int32_t iHeight);
|
||||||
void McHorVer22VerLastAlign_sse2 (uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
|
void McHorVer22Width8VerLastAlign_sse2 (uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
|
||||||
int32_t iHeight);
|
int32_t iHeight);
|
||||||
void McHorVer22VerLastUnAlign_sse2 (uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
|
void McHorVer22Width8VerLastUnAlign_sse2 (uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
int32_t iWidth, int32_t iHeight);
|
int32_t iWidth, int32_t iHeight);
|
||||||
void McChromaWidthEq8_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, const uint8_t* kpABCD,
|
void McChromaWidthEq8_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, const uint8_t* kpABCD,
|
||||||
int32_t iHeigh);
|
int32_t iHeigh);
|
||||||
@@ -80,8 +80,6 @@ void McHorVer22Width8HorFirst_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t*
|
|||||||
int32_t iHeight);
|
int32_t iHeight);
|
||||||
void PixelAvgWidthEq16_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, uint8_t*, int32_t, int32_t);
|
void PixelAvgWidthEq16_sse2 (uint8_t*, int32_t, uint8_t*, int32_t, uint8_t*, int32_t, int32_t);
|
||||||
|
|
||||||
|
|
||||||
void PixelAvgWidthEq16_ssse3 (uint8_t*, int32_t, uint8_t*, int32_t, uint8_t*, int32_t, int32_t);
|
|
||||||
void McChromaWidthEq8_ssse3 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
void McChromaWidthEq8_ssse3 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
const uint8_t* kpABCD, int32_t iHeigh);
|
const uint8_t* kpABCD, int32_t iHeigh);
|
||||||
|
|
||||||
|
|||||||
@@ -107,9 +107,6 @@ int32_t WelsIntra16x16Combined3Satd_sse41 (uint8_t*, int32_t, uint8_t*, int32_t,
|
|||||||
int32_t WelsIntra16x16Combined3Sad_ssse3 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*);
|
int32_t WelsIntra16x16Combined3Sad_ssse3 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*);
|
||||||
int32_t WelsIntraChroma8x8Combined3Satd_sse41 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*,
|
int32_t WelsIntraChroma8x8Combined3Satd_sse41 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*,
|
||||||
uint8_t*, uint8_t*);
|
uint8_t*, uint8_t*);
|
||||||
int32_t WelsIntraChroma8x8Combined3Sad_ssse3 (uint8_t*, int32_t, uint8_t*, int32_t, int32_t*, int32_t, uint8_t*,
|
|
||||||
uint8_t*, uint8_t*);
|
|
||||||
|
|
||||||
|
|
||||||
#endif//X86_ASM
|
#endif//X86_ASM
|
||||||
|
|
||||||
|
|||||||
@@ -261,15 +261,15 @@ void WelsInitReconstructionFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFl
|
|||||||
|
|
||||||
#if defined(X86_ASM)
|
#if defined(X86_ASM)
|
||||||
if (uiCpuFlag & WELS_CPU_MMXEXT) {
|
if (uiCpuFlag & WELS_CPU_MMXEXT) {
|
||||||
pFuncList->pfIDctT4 = WelsIDctT4Rec_mmx;
|
// pFuncList->pfIDctT4 = WelsIDctT4Rec_mmx;
|
||||||
}
|
}
|
||||||
if (uiCpuFlag & WELS_CPU_SSE2) {
|
if (uiCpuFlag & WELS_CPU_SSE2) {
|
||||||
pFuncList->pfDequantization4x4 = WelsDequant4x4_sse2;
|
/* pFuncList->pfDequantization4x4 = WelsDequant4x4_sse2;
|
||||||
pFuncList->pfDequantizationFour4x4 = WelsDequantFour4x4_sse2;
|
pFuncList->pfDequantizationFour4x4 = WelsDequantFour4x4_sse2;
|
||||||
pFuncList->pfDequantizationIHadamard4x4 = WelsDequantIHadamard4x4_sse2;
|
pFuncList->pfDequantizationIHadamard4x4 = WelsDequantIHadamard4x4_sse2;
|
||||||
|
|
||||||
pFuncList->pfIDctFourT4 = WelsIDctFourT4Rec_sse2;
|
pFuncList->pfIDctFourT4 = WelsIDctFourT4Rec_sse2;
|
||||||
pFuncList->pfIDctI16x16Dc = WelsIDctRecI16x16Dc_sse2;
|
pFuncList->pfIDctI16x16Dc = WelsIDctRecI16x16Dc_sse2;*/
|
||||||
}
|
}
|
||||||
#endif//X86_ASM
|
#endif//X86_ASM
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -569,7 +569,7 @@ void WelsInitEncodingFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
|
|||||||
}
|
}
|
||||||
//#ifndef MACOS
|
//#ifndef MACOS
|
||||||
if (uiCpuFlag & WELS_CPU_SSSE3) {
|
if (uiCpuFlag & WELS_CPU_SSSE3) {
|
||||||
pFuncList->pfScan4x4 = WelsScan4x4DcAc_ssse3;
|
// pFuncList->pfScan4x4 = WelsScan4x4DcAc_ssse3;
|
||||||
}
|
}
|
||||||
|
|
||||||
//#endif//MACOS
|
//#endif//MACOS
|
||||||
|
|||||||
@@ -1919,7 +1919,7 @@ void OutputCpuFeaturesLog (uint32_t uiCpuFeatureFlags, uint32_t uiCpuCores, int3
|
|||||||
uiCpuCores,
|
uiCpuCores,
|
||||||
iCacheLineSize);
|
iCacheLineSize);
|
||||||
|
|
||||||
#ifdef _DEBUG // output at console & _debug
|
//#ifdef _DEBUG // output at console & _debug
|
||||||
fprintf (stderr, "WELS CPU features/capacities (0x%x) detected: \n" \
|
fprintf (stderr, "WELS CPU features/capacities (0x%x) detected: \n" \
|
||||||
"HTT: %c, " \
|
"HTT: %c, " \
|
||||||
"MMX: %c, " \
|
"MMX: %c, " \
|
||||||
@@ -1962,7 +1962,7 @@ void OutputCpuFeaturesLog (uint32_t uiCpuFeatureFlags, uint32_t uiCpuCores, int3
|
|||||||
(uiCpuFeatureFlags & WELS_CPU_AES) ? 'Y' : 'N',
|
(uiCpuFeatureFlags & WELS_CPU_AES) ? 'Y' : 'N',
|
||||||
uiCpuCores,
|
uiCpuCores,
|
||||||
iCacheLineSize);
|
iCacheLineSize);
|
||||||
#endif//_DEBUG
|
//#endif//_DEBUG
|
||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
|
|||||||
@@ -29,14 +29,12 @@
|
|||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "expand_pic.h"
|
#include "expand_pic.h"
|
||||||
#include "cpu_core.h"
|
#include "cpu_core.h"
|
||||||
#include "wels_func_ptr_def.h"
|
#include "wels_func_ptr_def.h"
|
||||||
|
|
||||||
namespace WelsSVCEnc{
|
namespace WelsSVCEnc{
|
||||||
|
|
||||||
// rewrite it (split into luma & chroma) that is helpful for mmx/sse2 optimization perform, 9/27/2009
|
// rewrite it (split into luma & chroma) that is helpful for mmx/sse2 optimization perform, 9/27/2009
|
||||||
static inline void ExpandPictureLuma_c (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW,
|
static inline void ExpandPictureLuma_c (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicW,
|
||||||
const int32_t kiPicH) {
|
const int32_t kiPicH) {
|
||||||
@@ -144,6 +142,8 @@ void ExpandReferencingPicture (SPicture* pPic, PExpandPictureFunc pExpLuma, PExp
|
|||||||
const int32_t kiWidthUV = kiWidthY >> 1;
|
const int32_t kiWidthUV = kiWidthY >> 1;
|
||||||
const int32_t kiHeightUV = kiHeightY >> 1;
|
const int32_t kiHeightUV = kiHeightY >> 1;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
pExpLuma (pPicY, pPic->iLineSize[0], kiWidthY, kiHeightY);
|
pExpLuma (pPicY, pPic->iLineSize[0], kiWidthY, kiHeightY);
|
||||||
if (kiWidthUV >= 16) {
|
if (kiWidthUV >= 16) {
|
||||||
// fix coding picture size as 16x16
|
// fix coding picture size as 16x16
|
||||||
@@ -155,6 +155,7 @@ void ExpandReferencingPicture (SPicture* pPic, PExpandPictureFunc pExpLuma, PExp
|
|||||||
ExpandPictureChroma_c (pPicCb, pPic->iLineSize[1], kiWidthUV, kiHeightUV);
|
ExpandPictureChroma_c (pPicCb, pPic->iLineSize[1], kiWidthUV, kiHeightUV);
|
||||||
ExpandPictureChroma_c (pPicCr, pPic->iLineSize[2], kiWidthUV, kiHeightUV);
|
ExpandPictureChroma_c (pPicCr, pPic->iLineSize[2], kiWidthUV, kiHeightUV);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -80,13 +80,13 @@ void WelsInitFillingPredFuncs (const uint32_t kuiCpuFlag) {
|
|||||||
|
|
||||||
#if defined(X86_ASM)
|
#if defined(X86_ASM)
|
||||||
if (kuiCpuFlag & WELS_CPU_MMXEXT) {
|
if (kuiCpuFlag & WELS_CPU_MMXEXT) {
|
||||||
WelsFillingPred8to16 = WelsFillingPred8to16_mmx;
|
// WelsFillingPred8to16 = WelsFillingPred8to16_mmx;
|
||||||
WelsFillingPred8x2to16 = WelsFillingPred8x2to16_mmx;
|
// WelsFillingPred8x2to16 = WelsFillingPred8x2to16_mmx;
|
||||||
WelsFillingPred1to16 = WelsFillingPred1to16_mmx;
|
// WelsFillingPred1to16 = WelsFillingPred1to16_mmx;
|
||||||
}
|
}
|
||||||
if (kuiCpuFlag & WELS_CPU_SSE2) {
|
if (kuiCpuFlag & WELS_CPU_SSE2) {
|
||||||
WelsFillingPred8x2to16 = WelsFillingPred8x2to16_sse2;
|
// WelsFillingPred8x2to16 = WelsFillingPred8x2to16_sse2;
|
||||||
WelsFillingPred1to16 = WelsFillingPred1to16_sse2;
|
// WelsFillingPred1to16 = WelsFillingPred1to16_sse2;
|
||||||
}
|
}
|
||||||
#endif//X86_ASM
|
#endif//X86_ASM
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -426,7 +426,7 @@ static inline void McHorVer22WidthEq8_sse2 (uint8_t* pSrc, int32_t iSrcStride, u
|
|||||||
int32_t iHeight) {
|
int32_t iHeight) {
|
||||||
ENFORCE_STACK_ALIGN_2D (int16_t, pTap, 21, 8, 16)
|
ENFORCE_STACK_ALIGN_2D (int16_t, pTap, 21, 8, 16)
|
||||||
McHorVer22Width8HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)pTap, 16, iHeight + 5);
|
McHorVer22Width8HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)pTap, 16, iHeight + 5);
|
||||||
McHorVer22VerLastAlign_sse2 ((uint8_t*)pTap, 16, pDst, iDstStride, 8, iHeight);
|
McHorVer22Width8VerLastAlign_sse2 ((uint8_t*)pTap, 16, pDst, iDstStride, 8, iHeight);
|
||||||
}
|
}
|
||||||
|
|
||||||
//2010.2.5
|
//2010.2.5
|
||||||
@@ -441,13 +441,13 @@ static inline void McHorVer22WidthEq16_sse2 (uint8_t* pSrc, int32_t iSrcStride,
|
|||||||
McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
|
||||||
McHorVer22WidthEq8_sse2 (&pSrc[8], iSrcStride, &pDst[8], iDstStride, iHeight);
|
McHorVer22WidthEq8_sse2 (&pSrc[8], iSrcStride, &pDst[8], iDstStride, iHeight);
|
||||||
}
|
}
|
||||||
void McHorVer22_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
|
void McHorVer22Width9Or17Height9Or17_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
|
||||||
int32_t iHeight) {
|
int32_t iHeight) {
|
||||||
ENFORCE_STACK_ALIGN_2D (int16_t, pTap, 22, 24, 16)
|
ENFORCE_STACK_ALIGN_2D (int16_t, pTap, 22, 24, 16)
|
||||||
int32_t tmp1 = 2 * (iWidth - 8);
|
int32_t tmp1 = 2 * (iWidth - 8);
|
||||||
McHorVer22HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)pTap, 48, iWidth, iHeight + 5);
|
McHorVer22HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)pTap, 48, iWidth, iHeight + 5);
|
||||||
McHorVer22VerLastAlign_sse2 ((uint8_t*)pTap, 48, pDst, iDstStride, iWidth - 1, iHeight);
|
McHorVer22Width8VerLastAlign_sse2 ((uint8_t*)pTap, 48, pDst, iDstStride, iWidth - 1, iHeight);
|
||||||
McHorVer22VerLastUnAlign_sse2 ((uint8_t*)pTap + tmp1, 48, pDst + iWidth - 8, iDstStride, 8, iHeight);
|
McHorVer22Width8VerLastUnAlign_sse2 ((uint8_t*)pTap + tmp1, 48, pDst + iWidth - 8, iDstStride, 8, iHeight);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void (*McChromaWidthEqx) (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
typedef void (*McChromaWidthEqx) (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
|
||||||
@@ -523,9 +523,9 @@ void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
|
|||||||
pFuncList->sMcFuncs.pfLumaQuarpelMc = pWelsMcFuncWidthEq16;
|
pFuncList->sMcFuncs.pfLumaQuarpelMc = pWelsMcFuncWidthEq16;
|
||||||
#if defined (X86_ASM)
|
#if defined (X86_ASM)
|
||||||
if (uiCpuFlag & WELS_CPU_SSE2) {
|
if (uiCpuFlag & WELS_CPU_SSE2) {
|
||||||
pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20_sse2;
|
pFuncList->sMcFuncs.pfLumaHalfpelHor = McHorVer20Width9Or17_sse2;
|
||||||
pFuncList->sMcFuncs.pfLumaHalfpelVer = McHorVer02_sse2;
|
pFuncList->sMcFuncs.pfLumaHalfpelVer = McHorVer02Height9Or17_sse2;
|
||||||
pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22_sse2;
|
pFuncList->sMcFuncs.pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2;
|
||||||
pFuncList->sMcFuncs.pfSampleAveraging[0] = PixelAvgWidthEq8_mmx;
|
pFuncList->sMcFuncs.pfSampleAveraging[0] = PixelAvgWidthEq8_mmx;
|
||||||
pFuncList->sMcFuncs.pfSampleAveraging[1] = PixelAvgWidthEq16_sse2;
|
pFuncList->sMcFuncs.pfSampleAveraging[1] = PixelAvgWidthEq16_sse2;
|
||||||
pFuncList->sMcFuncs.pfChromaMc = McChroma_sse2;
|
pFuncList->sMcFuncs.pfChromaMc = McChroma_sse2;
|
||||||
@@ -541,7 +541,6 @@ void WelsInitMcFuncs (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
|
|||||||
|
|
||||||
if (uiCpuFlag & WELS_CPU_SSSE3) {
|
if (uiCpuFlag & WELS_CPU_SSSE3) {
|
||||||
pFuncList->sMcFuncs.pfChromaMc = McChroma_ssse3;
|
pFuncList->sMcFuncs.pfChromaMc = McChroma_ssse3;
|
||||||
pFuncList->sMcFuncs.pfSampleAveraging[1] = PixelAvgWidthEq16_ssse3;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif //(X86_ASM)
|
#endif //(X86_ASM)
|
||||||
|
|||||||
@@ -439,7 +439,7 @@ uint8_t MdInterAnalysisVaaInfo_c (int32_t* pSad8x8) {
|
|||||||
return (uiMbSign);
|
return (uiMbSign);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int32_t AnalysisVaaInfoIntra_c (uint8_t* pDataY, const int32_t kiLineSize) {
|
int32_t AnalysisVaaInfoIntra_c (uint8_t* pDataY, const int32_t kiLineSize) {
|
||||||
ENFORCE_STACK_ALIGN_1D (uint16_t, uiAvgBlock, 16, 16)
|
ENFORCE_STACK_ALIGN_1D (uint16_t, uiAvgBlock, 16, 16)
|
||||||
uint16_t* pBlock = &uiAvgBlock[0];
|
uint16_t* pBlock = &uiAvgBlock[0];
|
||||||
uint8_t* pEncData = pDataY;
|
uint8_t* pEncData = pDataY;
|
||||||
|
|||||||
@@ -465,11 +465,11 @@ void WelsInitSampleSadFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
|
|||||||
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16 ] = WelsSampleSatd8x16_sse2;
|
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16 ] = WelsSampleSatd8x16_sse2;
|
||||||
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8 ] = WelsSampleSatd16x8_sse2;
|
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8 ] = WelsSampleSatd16x8_sse2;
|
||||||
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_sse2;
|
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_sse2;
|
||||||
pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd = WelsSmpleSatdThree4x4_sse2;
|
//pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd = WelsSmpleSatdThree4x4_sse2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (uiCpuFlag & WELS_CPU_SSSE3) {
|
if (uiCpuFlag & WELS_CPU_SSSE3) {
|
||||||
pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad = WelsIntra16x16Combined3Sad_ssse3;
|
//pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad = WelsIntra16x16Combined3Sad_ssse3;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (uiCpuFlag & WELS_CPU_SSE41) {
|
if (uiCpuFlag & WELS_CPU_SSE41) {
|
||||||
@@ -478,8 +478,8 @@ void WelsInitSampleSadFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
|
|||||||
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16] = WelsSampleSatd8x16_sse41;
|
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16] = WelsSampleSatd8x16_sse41;
|
||||||
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8] = WelsSampleSatd8x8_sse41;
|
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8] = WelsSampleSatd8x8_sse41;
|
||||||
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4] = WelsSampleSatd4x4_sse41;
|
pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4] = WelsSampleSatd4x4_sse41;
|
||||||
pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Satd = WelsIntra16x16Combined3Satd_sse41;
|
//pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Satd = WelsIntra16x16Combined3Satd_sse41;
|
||||||
pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd = WelsIntraChroma8x8Combined3Satd_sse41;
|
//pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd = WelsIntraChroma8x8Combined3Satd_sse41;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif //(X86_ASM)
|
#endif //(X86_ASM)
|
||||||
|
|||||||
@@ -207,7 +207,7 @@ void InitCoeffFunc (const uint32_t uiCpuFlag) {
|
|||||||
|
|
||||||
#if defined(X86_ASM)
|
#if defined(X86_ASM)
|
||||||
if (uiCpuFlag & WELS_CPU_SSE2) {
|
if (uiCpuFlag & WELS_CPU_SSE2) {
|
||||||
sCoeffFunc.pfCavlcParamCal = CavlcParamCal_sse2;
|
// sCoeffFunc.pfCavlcParamCal = CavlcParamCal_sse2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -38,22 +38,13 @@ ENCODER_CPP_SRCS=\
|
|||||||
ENCODER_OBJS += $(ENCODER_CPP_SRCS:.cpp=.o)
|
ENCODER_OBJS += $(ENCODER_CPP_SRCS:.cpp=.o)
|
||||||
ifeq ($(USE_ASM), Yes)
|
ifeq ($(USE_ASM), Yes)
|
||||||
ENCODER_ASM_SRCS=\
|
ENCODER_ASM_SRCS=\
|
||||||
$(ENCODER_SRCDIR)/./core/asm/asm_inc.asm\
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/coeff.asm\
|
$(ENCODER_SRCDIR)/./core/asm/coeff.asm\
|
||||||
$(ENCODER_SRCDIR)/./core/asm/cpuid.asm\
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/dct.asm\
|
$(ENCODER_SRCDIR)/./core/asm/dct.asm\
|
||||||
$(ENCODER_SRCDIR)/./core/asm/deblock.asm\
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/expand_picture.asm\
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/intra_pred.asm\
|
$(ENCODER_SRCDIR)/./core/asm/intra_pred.asm\
|
||||||
$(ENCODER_SRCDIR)/./core/asm/intra_pred_util.asm\
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/mb_copy.asm\
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/mc_chroma.asm\
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/mc_luma.asm\
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/memzero.asm\
|
$(ENCODER_SRCDIR)/./core/asm/memzero.asm\
|
||||||
$(ENCODER_SRCDIR)/./core/asm/quant.asm\
|
$(ENCODER_SRCDIR)/./core/asm/quant.asm\
|
||||||
$(ENCODER_SRCDIR)/./core/asm/satd_sad.asm\
|
$(ENCODER_SRCDIR)/./core/asm/satd_sad.asm\
|
||||||
$(ENCODER_SRCDIR)/./core/asm/score.asm\
|
$(ENCODER_SRCDIR)/./core/asm/score.asm\
|
||||||
$(ENCODER_SRCDIR)/./core/asm/vaa.asm\
|
|
||||||
|
|
||||||
ENCODER_OBJS += $(ENCODER_ASM_SRCS:.asm=.o)
|
ENCODER_OBJS += $(ENCODER_ASM_SRCS:.asm=.o)
|
||||||
endif
|
endif
|
||||||
@@ -158,39 +149,15 @@ $(ENCODER_SRCDIR)/./plus/src/welsCodecTrace.o: $(ENCODER_SRCDIR)/./plus/src/wels
|
|||||||
$(ENCODER_SRCDIR)/./plus/src/welsEncoderExt.o: $(ENCODER_SRCDIR)/./plus/src/welsEncoderExt.cpp
|
$(ENCODER_SRCDIR)/./plus/src/welsEncoderExt.o: $(ENCODER_SRCDIR)/./plus/src/welsEncoderExt.cpp
|
||||||
$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./plus/src/welsEncoderExt.o $(ENCODER_SRCDIR)/./plus/src/welsEncoderExt.cpp
|
$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(ENCODER_CFLAGS) $(ENCODER_INCLUDES) -c -o $(ENCODER_SRCDIR)/./plus/src/welsEncoderExt.o $(ENCODER_SRCDIR)/./plus/src/welsEncoderExt.cpp
|
||||||
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/asm_inc.o: $(ENCODER_SRCDIR)/./core/asm/asm_inc.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/asm_inc.o $(ENCODER_SRCDIR)/./core/asm/asm_inc.asm
|
|
||||||
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/coeff.o: $(ENCODER_SRCDIR)/./core/asm/coeff.asm
|
$(ENCODER_SRCDIR)/./core/asm/coeff.o: $(ENCODER_SRCDIR)/./core/asm/coeff.asm
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/coeff.o $(ENCODER_SRCDIR)/./core/asm/coeff.asm
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/coeff.o $(ENCODER_SRCDIR)/./core/asm/coeff.asm
|
||||||
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/cpuid.o: $(ENCODER_SRCDIR)/./core/asm/cpuid.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/cpuid.o $(ENCODER_SRCDIR)/./core/asm/cpuid.asm
|
|
||||||
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/dct.o: $(ENCODER_SRCDIR)/./core/asm/dct.asm
|
$(ENCODER_SRCDIR)/./core/asm/dct.o: $(ENCODER_SRCDIR)/./core/asm/dct.asm
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/dct.o $(ENCODER_SRCDIR)/./core/asm/dct.asm
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/dct.o $(ENCODER_SRCDIR)/./core/asm/dct.asm
|
||||||
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/deblock.o: $(ENCODER_SRCDIR)/./core/asm/deblock.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/deblock.o $(ENCODER_SRCDIR)/./core/asm/deblock.asm
|
|
||||||
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/expand_picture.o: $(ENCODER_SRCDIR)/./core/asm/expand_picture.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/expand_picture.o $(ENCODER_SRCDIR)/./core/asm/expand_picture.asm
|
|
||||||
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/intra_pred.o: $(ENCODER_SRCDIR)/./core/asm/intra_pred.asm
|
$(ENCODER_SRCDIR)/./core/asm/intra_pred.o: $(ENCODER_SRCDIR)/./core/asm/intra_pred.asm
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/intra_pred.o $(ENCODER_SRCDIR)/./core/asm/intra_pred.asm
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/intra_pred.o $(ENCODER_SRCDIR)/./core/asm/intra_pred.asm
|
||||||
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/intra_pred_util.o: $(ENCODER_SRCDIR)/./core/asm/intra_pred_util.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/intra_pred_util.o $(ENCODER_SRCDIR)/./core/asm/intra_pred_util.asm
|
|
||||||
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/mb_copy.o: $(ENCODER_SRCDIR)/./core/asm/mb_copy.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/mb_copy.o $(ENCODER_SRCDIR)/./core/asm/mb_copy.asm
|
|
||||||
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/mc_chroma.o: $(ENCODER_SRCDIR)/./core/asm/mc_chroma.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/mc_chroma.o $(ENCODER_SRCDIR)/./core/asm/mc_chroma.asm
|
|
||||||
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/mc_luma.o: $(ENCODER_SRCDIR)/./core/asm/mc_luma.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/mc_luma.o $(ENCODER_SRCDIR)/./core/asm/mc_luma.asm
|
|
||||||
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/memzero.o: $(ENCODER_SRCDIR)/./core/asm/memzero.asm
|
$(ENCODER_SRCDIR)/./core/asm/memzero.o: $(ENCODER_SRCDIR)/./core/asm/memzero.asm
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/memzero.o $(ENCODER_SRCDIR)/./core/asm/memzero.asm
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/memzero.o $(ENCODER_SRCDIR)/./core/asm/memzero.asm
|
||||||
|
|
||||||
@@ -203,9 +170,6 @@ $(ENCODER_SRCDIR)/./core/asm/satd_sad.o: $(ENCODER_SRCDIR)/./core/asm/satd_sad.a
|
|||||||
$(ENCODER_SRCDIR)/./core/asm/score.o: $(ENCODER_SRCDIR)/./core/asm/score.asm
|
$(ENCODER_SRCDIR)/./core/asm/score.o: $(ENCODER_SRCDIR)/./core/asm/score.asm
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/score.o $(ENCODER_SRCDIR)/./core/asm/score.asm
|
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/score.o $(ENCODER_SRCDIR)/./core/asm/score.asm
|
||||||
|
|
||||||
$(ENCODER_SRCDIR)/./core/asm/vaa.o: $(ENCODER_SRCDIR)/./core/asm/vaa.asm
|
|
||||||
$(ASM) $(ASMFLAGS) $(ASM_INCLUDES) $(ENCODER_ASMFLAGS) $(ENCODER_ASM_INCLUDES) -o $(ENCODER_SRCDIR)/./core/asm/vaa.o $(ENCODER_SRCDIR)/./core/asm/vaa.asm
|
|
||||||
|
|
||||||
$(LIBPREFIX)encoder.$(LIBSUFFIX): $(ENCODER_OBJS)
|
$(LIBPREFIX)encoder.$(LIBSUFFIX): $(ENCODER_OBJS)
|
||||||
rm -f $(LIBPREFIX)encoder.$(LIBSUFFIX)
|
rm -f $(LIBPREFIX)encoder.$(LIBSUFFIX)
|
||||||
$(AR) cr $@ $(ENCODER_OBJS)
|
$(AR) cr $@ $(ENCODER_OBJS)
|
||||||
|
|||||||
@@ -106,6 +106,91 @@
|
|||||||
CommandLine=""
|
CommandLine=""
|
||||||
/>
|
/>
|
||||||
</Configuration>
|
</Configuration>
|
||||||
|
<Configuration
|
||||||
|
Name="Debug|x64"
|
||||||
|
OutputDirectory=".\..\..\..\..\bin\win64\Debug"
|
||||||
|
IntermediateDirectory=".\..\..\..\obj\vp\Debug"
|
||||||
|
ConfigurationType="2"
|
||||||
|
CharacterSet="1"
|
||||||
|
WholeProgramOptimization="0"
|
||||||
|
>
|
||||||
|
<Tool
|
||||||
|
Name="VCPreBuildEventTool"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCCustomBuildTool"
|
||||||
|
CommandLine=""
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="MASM"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCXMLDataGeneratorTool"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCWebServiceProxyGeneratorTool"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCMIDLTool"
|
||||||
|
TargetEnvironment="3"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCCLCompilerTool"
|
||||||
|
Optimization="0"
|
||||||
|
AdditionalIncludeDirectories=""
|
||||||
|
PreprocessorDefinitions="WIN64;_DEBUG;_WINDOWS;_USRDLL;WELSVP_EXPORTS;X86_ASM"
|
||||||
|
MinimalRebuild="true"
|
||||||
|
BasicRuntimeChecks="3"
|
||||||
|
RuntimeLibrary="1"
|
||||||
|
UsePrecompiledHeader="0"
|
||||||
|
AssemblerListingLocation=""
|
||||||
|
WarningLevel="3"
|
||||||
|
DebugInformationFormat="3"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCManagedResourceCompilerTool"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCResourceCompilerTool"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCPreLinkEventTool"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCLinkerTool"
|
||||||
|
LinkLibraryDependencies="true"
|
||||||
|
OutputFile="$(OutDir)\welsvp.dll"
|
||||||
|
LinkIncremental="2"
|
||||||
|
ModuleDefinitionFile="../../src/common/WelsVP.def"
|
||||||
|
GenerateDebugInformation="true"
|
||||||
|
GenerateMapFile="true"
|
||||||
|
MapFileName="$(OutDir)\welsvp.map"
|
||||||
|
SubSystem="2"
|
||||||
|
TargetMachine="17"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCALinkTool"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCManifestTool"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCXDCMakeTool"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCBscMakeTool"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCFxCopTool"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCAppVerifierTool"
|
||||||
|
/>
|
||||||
|
<Tool
|
||||||
|
Name="VCPostBuildEventTool"
|
||||||
|
CommandLine=""
|
||||||
|
/>
|
||||||
|
</Configuration>
|
||||||
<Configuration
|
<Configuration
|
||||||
Name="Release|Win32"
|
Name="Release|Win32"
|
||||||
OutputDirectory=".\..\..\..\bin\win32\Release"
|
OutputDirectory=".\..\..\..\bin\win32\Release"
|
||||||
@@ -194,94 +279,9 @@
|
|||||||
CommandLine=""
|
CommandLine=""
|
||||||
/>
|
/>
|
||||||
</Configuration>
|
</Configuration>
|
||||||
<Configuration
|
|
||||||
Name="Debug|x64"
|
|
||||||
OutputDirectory=".\..\..\..\bin\win32\Debug"
|
|
||||||
IntermediateDirectory=".\..\..\..\obj\vp\Debug"
|
|
||||||
ConfigurationType="2"
|
|
||||||
CharacterSet="1"
|
|
||||||
WholeProgramOptimization="0"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCPreBuildEventTool"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine=""
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="MASM"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCXMLDataGeneratorTool"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCWebServiceProxyGeneratorTool"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCMIDLTool"
|
|
||||||
TargetEnvironment="3"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCCLCompilerTool"
|
|
||||||
Optimization="0"
|
|
||||||
AdditionalIncludeDirectories=""
|
|
||||||
PreprocessorDefinitions="WIN64;_DEBUG;_WINDOWS;_USRDLL;WELSVP_EXPORTS"
|
|
||||||
MinimalRebuild="true"
|
|
||||||
BasicRuntimeChecks="3"
|
|
||||||
RuntimeLibrary="1"
|
|
||||||
UsePrecompiledHeader="0"
|
|
||||||
AssemblerListingLocation=""
|
|
||||||
WarningLevel="3"
|
|
||||||
DebugInformationFormat="3"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCManagedResourceCompilerTool"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCResourceCompilerTool"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCPreLinkEventTool"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCLinkerTool"
|
|
||||||
LinkLibraryDependencies="true"
|
|
||||||
OutputFile="$(OutDir)\welsvp.dll"
|
|
||||||
LinkIncremental="2"
|
|
||||||
ModuleDefinitionFile="../../src/common/WelsVP.def"
|
|
||||||
GenerateDebugInformation="true"
|
|
||||||
GenerateMapFile="true"
|
|
||||||
MapFileName="$(OutDir)\welsvp.map"
|
|
||||||
SubSystem="2"
|
|
||||||
TargetMachine="17"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCALinkTool"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCManifestTool"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCXDCMakeTool"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCBscMakeTool"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCFxCopTool"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCAppVerifierTool"
|
|
||||||
/>
|
|
||||||
<Tool
|
|
||||||
Name="VCPostBuildEventTool"
|
|
||||||
CommandLine=""
|
|
||||||
/>
|
|
||||||
</Configuration>
|
|
||||||
<Configuration
|
<Configuration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
OutputDirectory=".\..\..\..\bin\win64\Release"
|
OutputDirectory=".\..\..\..\..\bin\win64\Release"
|
||||||
IntermediateDirectory=".\..\..\..\obj\vp\Release"
|
IntermediateDirectory=".\..\..\..\obj\vp\Release"
|
||||||
ConfigurationType="2"
|
ConfigurationType="2"
|
||||||
CharacterSet="1"
|
CharacterSet="1"
|
||||||
@@ -313,7 +313,7 @@
|
|||||||
Optimization="3"
|
Optimization="3"
|
||||||
EnableIntrinsicFunctions="false"
|
EnableIntrinsicFunctions="false"
|
||||||
FavorSizeOrSpeed="1"
|
FavorSizeOrSpeed="1"
|
||||||
PreprocessorDefinitions="WIN64;NDEBUG;_WINDOWS;_USRDLL;WELSVP_EXPORTS"
|
PreprocessorDefinitions="WIN64;NDEBUG;_WINDOWS;_USRDLL;WELSVP_EXPORTS;X86_ASM"
|
||||||
RuntimeLibrary="0"
|
RuntimeLibrary="0"
|
||||||
EnableFunctionLevelLinking="false"
|
EnableFunctionLevelLinking="false"
|
||||||
UsePrecompiledHeader="0"
|
UsePrecompiledHeader="0"
|
||||||
@@ -480,14 +480,23 @@
|
|||||||
Name="ASM"
|
Name="ASM"
|
||||||
>
|
>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\src\asm\asm_inc.asm"
|
RelativePath="..\..\..\common\cpuid.asm"
|
||||||
>
|
>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Debug|Win32"
|
Name="Debug|Win32"
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
||||||
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
|
/>
|
||||||
|
</FileConfiguration>
|
||||||
|
<FileConfiguration
|
||||||
|
Name="Debug|x64"
|
||||||
|
>
|
||||||
|
<Tool
|
||||||
|
Name="VCCustomBuildTool"
|
||||||
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -496,69 +505,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Debug|x64"
|
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
</File>
|
|
||||||
<File
|
|
||||||
RelativePath="..\..\src\asm\cpuid.asm"
|
|
||||||
>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Debug|Win32"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Release|Win32"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Debug|x64"
|
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Release|x64"
|
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -571,7 +527,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
||||||
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
|
/>
|
||||||
|
</FileConfiguration>
|
||||||
|
<FileConfiguration
|
||||||
|
Name="Debug|x64"
|
||||||
|
>
|
||||||
|
<Tool
|
||||||
|
Name="VCCustomBuildTool"
|
||||||
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -580,27 +545,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Debug|x64"
|
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -613,7 +567,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
||||||
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
|
/>
|
||||||
|
</FileConfiguration>
|
||||||
|
<FileConfiguration
|
||||||
|
Name="Debug|x64"
|
||||||
|
>
|
||||||
|
<Tool
|
||||||
|
Name="VCCustomBuildTool"
|
||||||
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -622,27 +585,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Debug|x64"
|
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -655,7 +607,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
|
/>
|
||||||
|
</FileConfiguration>
|
||||||
|
<FileConfiguration
|
||||||
|
Name="Debug|x64"
|
||||||
|
>
|
||||||
|
<Tool
|
||||||
|
Name="VCCustomBuildTool"
|
||||||
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -664,27 +625,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Debug|x64"
|
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -697,7 +647,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
|
/>
|
||||||
|
</FileConfiguration>
|
||||||
|
<FileConfiguration
|
||||||
|
Name="Debug|x64"
|
||||||
|
>
|
||||||
|
<Tool
|
||||||
|
Name="VCCustomBuildTool"
|
||||||
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -706,27 +665,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Debug|x64"
|
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -739,7 +687,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
||||||
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
|
/>
|
||||||
|
</FileConfiguration>
|
||||||
|
<FileConfiguration
|
||||||
|
Name="Debug|x64"
|
||||||
|
>
|
||||||
|
<Tool
|
||||||
|
Name="VCCustomBuildTool"
|
||||||
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -748,27 +705,16 @@
|
|||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
|
||||||
/>
|
|
||||||
</FileConfiguration>
|
|
||||||
<FileConfiguration
|
|
||||||
Name="Debug|x64"
|
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
|
||||||
<Tool
|
|
||||||
Name="VCCustomBuildTool"
|
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
<FileConfiguration
|
<FileConfiguration
|
||||||
Name="Release|x64"
|
Name="Release|x64"
|
||||||
ExcludedFromBuild="true"
|
|
||||||
>
|
>
|
||||||
<Tool
|
<Tool
|
||||||
Name="VCCustomBuildTool"
|
Name="VCCustomBuildTool"
|
||||||
CommandLine="nasm -I$(InputDir) -f win32 -DPREFIX -o $(IntDir)\$(InputName).obj $(InputPath)
"
|
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||||
Outputs="$(IntDir)\$(InputName).obj"
|
Outputs="$(IntDir)\$(InputName).obj"
|
||||||
/>
|
/>
|
||||||
</FileConfiguration>
|
</FileConfiguration>
|
||||||
@@ -63,16 +63,16 @@
|
|||||||
<PropertyGroup Label="UserMacros" />
|
<PropertyGroup Label="UserMacros" />
|
||||||
<PropertyGroup>
|
<PropertyGroup>
|
||||||
<_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
|
<_ProjectFileVersion>10.0.40219.1</_ProjectFileVersion>
|
||||||
<OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\..\..\..\bin\win32\Debug\</OutDir>
|
<OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\..\bin\win32\Debug\</OutDir>
|
||||||
<OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">.\..\..\..\bin\win64\Debug\</OutDir>
|
<OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\..\bin\win64\Debug\</OutDir>
|
||||||
<IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">.\..\..\..\obj\vp\Debug\</IntDir>
|
<IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">..\..\..\..\obj\vp\Debug\</IntDir>
|
||||||
<IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">.\..\..\..\obj\vp\Debug\</IntDir>
|
<IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\..\..\obj\vp\Debug\</IntDir>
|
||||||
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
|
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
|
||||||
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
|
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</LinkIncremental>
|
||||||
<OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\..\..\..\bin\win32\Release\</OutDir>
|
<OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\..\bin\win32\Release\</OutDir>
|
||||||
<OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.\..\..\..\bin\win64\Release\</OutDir>
|
<OutDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\..\bin\win64\Release\</OutDir>
|
||||||
<IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">.\..\..\..\obj\vp\Release\</IntDir>
|
<IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">..\..\..\..\obj\vp\Release\</IntDir>
|
||||||
<IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">.\..\..\..\obj\vp\Release\</IntDir>
|
<IntDir Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\..\..\obj\vp\Release\</IntDir>
|
||||||
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
|
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
|
||||||
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
|
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</LinkIncremental>
|
||||||
<GenerateManifest Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</GenerateManifest>
|
<GenerateManifest Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</GenerateManifest>
|
||||||
@@ -144,7 +144,7 @@
|
|||||||
<ClCompile>
|
<ClCompile>
|
||||||
<Optimization>Disabled</Optimization>
|
<Optimization>Disabled</Optimization>
|
||||||
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>WIN64;_DEBUG;_WINDOWS;_USRDLL;WELSVP_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN64;_DEBUG;X86_ASM;_WINDOWS;_USRDLL;WELSVP_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||||
<PrecompiledHeader>
|
<PrecompiledHeader>
|
||||||
@@ -235,7 +235,7 @@
|
|||||||
<Optimization>Full</Optimization>
|
<Optimization>Full</Optimization>
|
||||||
<IntrinsicFunctions>false</IntrinsicFunctions>
|
<IntrinsicFunctions>false</IntrinsicFunctions>
|
||||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||||
<PreprocessorDefinitions>WIN64;NDEBUG;_WINDOWS;_USRDLL;WELSVP_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>WIN64;NDEBUG;X86_ASM;_WINDOWS;_USRDLL;WELSVP_EXPORTS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||||
<FunctionLevelLinking>false</FunctionLevelLinking>
|
<FunctionLevelLinking>false</FunctionLevelLinking>
|
||||||
<PrecompiledHeader>
|
<PrecompiledHeader>
|
||||||
@@ -315,117 +315,68 @@
|
|||||||
<ResourceCompile Include="..\..\src\common\WelsVP.rc" />
|
<ResourceCompile Include="..\..\src\common\WelsVP.rc" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="..\..\src\asm\asm_inc.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\src\asm\cpuid.asm">
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\src\asm\denoisefilter.asm">
|
<CustomBuild Include="..\..\src\asm\denoisefilter.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\src\asm\downsample_bilinear.asm">
|
<CustomBuild Include="..\..\src\asm\downsample_bilinear.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\src\asm\intra_pred.asm">
|
<CustomBuild Include="..\..\src\asm\intra_pred.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\src\asm\sad.asm">
|
<CustomBuild Include="..\..\src\asm\sad.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<CustomBuild Include="..\..\src\asm\vaa.asm">
|
<CustomBuild Include="..\..\src\asm\vaa.asm">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I%(RootDir)%(Directory) -f win32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)
|
|
||||||
</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
</CustomBuild>
|
||||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="..\..\..\common\cpuid.asm">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win32 -DX86_32 -DPREFIX -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -I ..\..\..\common\ -I%(RootDir)%(Directory) -f win64 -DWIN64 -o $(IntDir)%(Filename).obj %(FullPath)</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)%(Filename).obj;%(Outputs)</Outputs>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
@@ -116,12 +116,6 @@
|
|||||||
</ClInclude>
|
</ClInclude>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="..\..\src\asm\asm_inc.asm">
|
|
||||||
<Filter>ASM</Filter>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\src\asm\cpuid.asm">
|
|
||||||
<Filter>ASM</Filter>
|
|
||||||
</CustomBuild>
|
|
||||||
<CustomBuild Include="..\..\src\asm\denoisefilter.asm">
|
<CustomBuild Include="..\..\src\asm\denoisefilter.asm">
|
||||||
<Filter>ASM</Filter>
|
<Filter>ASM</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
@@ -137,6 +131,9 @@
|
|||||||
<CustomBuild Include="..\..\src\asm\vaa.asm">
|
<CustomBuild Include="..\..\src\asm\vaa.asm">
|
||||||
<Filter>ASM</Filter>
|
<Filter>ASM</Filter>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
|
<CustomBuild Include="..\..\..\common\cpuid.asm">
|
||||||
|
<Filter>ASM</Filter>
|
||||||
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Filter Include="ASM">
|
<Filter Include="ASM">
|
||||||
@@ -223,7 +223,7 @@ void CAdaptiveQuantization::WelsInitVarFunc (PVarFunc& pfVar, int32_t iCpuFlag)
|
|||||||
|
|
||||||
#ifdef X86_ASM
|
#ifdef X86_ASM
|
||||||
if (iCpuFlag & WELS_CPU_SSE2) {
|
if (iCpuFlag & WELS_CPU_SSE2) {
|
||||||
pfVar = SampleVariance16x16_sse2;
|
// pfVar = SampleVariance16x16_sse2;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
@@ -50,7 +50,7 @@ sse2_32 times 8 dw 32
|
|||||||
sse2_20 times 8 dw 20
|
sse2_20 times 8 dw 20
|
||||||
|
|
||||||
|
|
||||||
BITS 32
|
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
; Code
|
; Code
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
@@ -173,35 +173,40 @@ WELS_EXTERN BilateralLumaFilter8_sse2
|
|||||||
; 6 7 8
|
; 6 7 8
|
||||||
; 0: the center point
|
; 0: the center point
|
||||||
%define pushsize 4
|
%define pushsize 4
|
||||||
%define pixel esp + pushsize + 4
|
;%define pixel esp + pushsize + 4
|
||||||
%define stride esp + pushsize + 8
|
;%define stride esp + pushsize + 8
|
||||||
|
;%define pixel r0
|
||||||
|
;%define stride r1
|
||||||
|
|
||||||
BilateralLumaFilter8_sse2:
|
BilateralLumaFilter8_sse2:
|
||||||
push ebx
|
|
||||||
|
push r3
|
||||||
|
%assign push_num 1
|
||||||
|
LOAD_2_PARA
|
||||||
|
|
||||||
pxor xmm7, xmm7
|
pxor xmm7, xmm7
|
||||||
mov eax, [pixel]
|
|
||||||
mov ebx, eax
|
mov r3, r0
|
||||||
movq xmm6, [eax]
|
|
||||||
|
movq xmm6, [r0]
|
||||||
punpcklbw xmm6, xmm7
|
punpcklbw xmm6, xmm7
|
||||||
movdqa xmm3, [sse2_32]
|
movdqa xmm3, [sse2_32]
|
||||||
pxor xmm4, xmm4 ; nTotWeight
|
pxor xmm4, xmm4 ; nTotWeight
|
||||||
pxor xmm5, xmm5 ; nSum
|
pxor xmm5, xmm5 ; nSum
|
||||||
|
|
||||||
dec eax
|
dec r0
|
||||||
mov ecx, [stride]
|
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0] ; pixel 4
|
||||||
|
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0 + 2] ; pixel 5
|
||||||
|
|
||||||
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax] ; pixel 4
|
sub r0, r1
|
||||||
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax + 2] ; pixel 5
|
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0] ; pixel 1
|
||||||
|
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0 + 1] ; pixel 2
|
||||||
|
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0 + 2] ; pixel 3
|
||||||
|
|
||||||
sub eax, ecx
|
lea r0, [r0 + r1 * 2]
|
||||||
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax] ; pixel 1
|
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0] ; pixel 6
|
||||||
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax + 1] ; pixel 2
|
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0 + 1] ; pixel 7
|
||||||
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax + 2] ; pixel 3
|
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [r0 + 2] ; pixel 8
|
||||||
|
|
||||||
lea eax, [eax + ecx * 2]
|
|
||||||
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax] ; pixel 6
|
|
||||||
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax + 1] ; pixel 7
|
|
||||||
WEIGHT_LINE xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm0, [eax + 2] ; pixel 8
|
|
||||||
|
|
||||||
pcmpeqw xmm0, xmm0
|
pcmpeqw xmm0, xmm0
|
||||||
psrlw xmm0, 15
|
psrlw xmm0, 15
|
||||||
@@ -211,9 +216,12 @@ BilateralLumaFilter8_sse2:
|
|||||||
paddusw xmm5, xmm0
|
paddusw xmm5, xmm0
|
||||||
psrlw xmm5, 8
|
psrlw xmm5, 8
|
||||||
packuswb xmm5, xmm5
|
packuswb xmm5, xmm5
|
||||||
movq [ebx], xmm5
|
movq [r3], xmm5
|
||||||
|
|
||||||
|
|
||||||
|
pop r3
|
||||||
|
%assign push_num 0
|
||||||
|
|
||||||
pop ebx
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
WELS_EXTERN WaverageChromaFilter8_sse2
|
WELS_EXTERN WaverageChromaFilter8_sse2
|
||||||
@@ -229,35 +237,43 @@ WELS_EXTERN WaverageChromaFilter8_sse2
|
|||||||
|
|
||||||
ALIGN 16
|
ALIGN 16
|
||||||
WaverageChromaFilter8_sse2:
|
WaverageChromaFilter8_sse2:
|
||||||
mov edx, [esp + 4] ; pixels
|
|
||||||
mov ecx, [esp + 8] ; stride
|
|
||||||
|
|
||||||
mov eax, ecx
|
push r3
|
||||||
add eax, eax
|
|
||||||
sub edx, eax ; pixels - 2 * stride
|
%assign push_num 1
|
||||||
sub edx, 2
|
|
||||||
|
LOAD_2_PARA
|
||||||
|
|
||||||
|
mov r3, r1
|
||||||
|
add r3, r3
|
||||||
|
sub r0, r3 ; pixels - 2 * stride
|
||||||
|
sub r0, 2
|
||||||
|
|
||||||
pxor xmm0, xmm0
|
pxor xmm0, xmm0
|
||||||
pxor xmm3, xmm3
|
pxor xmm3, xmm3
|
||||||
|
|
||||||
movdqu xmm1, [edx]
|
movdqu xmm1, [r0]
|
||||||
WEIGHT_LINE1_UV xmm1, xmm2, xmm3, xmm0
|
WEIGHT_LINE1_UV xmm1, xmm2, xmm3, xmm0
|
||||||
|
|
||||||
movdqu xmm1, [edx + ecx]
|
movdqu xmm1, [r0 + r1]
|
||||||
WEIGHT_LINE2_UV xmm1, xmm2, xmm3, xmm0
|
WEIGHT_LINE2_UV xmm1, xmm2, xmm3, xmm0
|
||||||
|
|
||||||
add edx, eax
|
add r0, r3
|
||||||
movdqu xmm1, [edx]
|
movdqu xmm1, [r0]
|
||||||
WEIGHT_LINE3_UV xmm1, xmm2, xmm3, xmm0
|
WEIGHT_LINE3_UV xmm1, xmm2, xmm3, xmm0
|
||||||
|
|
||||||
movdqu xmm1, [edx + ecx]
|
movdqu xmm1, [r0 + r1]
|
||||||
WEIGHT_LINE2_UV xmm1, xmm2, xmm3, xmm0
|
WEIGHT_LINE2_UV xmm1, xmm2, xmm3, xmm0
|
||||||
|
|
||||||
movdqu xmm1, [edx + ecx * 2]
|
movdqu xmm1, [r0 + r1 * 2]
|
||||||
WEIGHT_LINE1_UV xmm1, xmm2, xmm3, xmm0
|
WEIGHT_LINE1_UV xmm1, xmm2, xmm3, xmm0
|
||||||
|
|
||||||
psrlw xmm3, 6
|
psrlw xmm3, 6
|
||||||
packuswb xmm3, xmm3
|
packuswb xmm3, xmm3
|
||||||
movq [edx + 2], xmm3
|
movq [r0 + 2], xmm3
|
||||||
|
|
||||||
|
|
||||||
|
pop r3
|
||||||
|
|
||||||
|
%assign push_num 0
|
||||||
ret
|
ret
|
||||||
@@ -39,8 +39,7 @@
|
|||||||
;*
|
;*
|
||||||
;*************************************************************************/
|
;*************************************************************************/
|
||||||
%include "asm_inc.asm"
|
%include "asm_inc.asm"
|
||||||
BITS 32
|
%ifdef X86_32
|
||||||
|
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
; Macros and other preprocessor constants
|
; Macros and other preprocessor constants
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
@@ -1223,3 +1222,4 @@ FAST_LAST_ROW_END:
|
|||||||
%undef xInverse
|
%undef xInverse
|
||||||
%undef dstStep
|
%undef dstStep
|
||||||
ret
|
ret
|
||||||
|
%endif
|
||||||
1505
codec/processing/src/asm/intra_pred.asm
Normal file
1505
codec/processing/src/asm/intra_pred.asm
Normal file
File diff suppressed because it is too large
Load Diff
@@ -29,7 +29,7 @@
|
|||||||
;* POSSIBILITY OF SUCH DAMAGE.
|
;* POSSIBILITY OF SUCH DAMAGE.
|
||||||
;*
|
;*
|
||||||
;*
|
;*
|
||||||
;* pixel_sse2.asm
|
;* sad.asm
|
||||||
;*
|
;*
|
||||||
;* Abstract
|
;* Abstract
|
||||||
;* WelsSampleSad8x8_sse21
|
;* WelsSampleSad8x8_sse21
|
||||||
@@ -42,50 +42,26 @@
|
|||||||
|
|
||||||
%include "asm_inc.asm"
|
%include "asm_inc.asm"
|
||||||
|
|
||||||
BITS 32
|
|
||||||
|
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
; Macros and other preprocessor constants
|
; Macros and other preprocessor constants
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
|
|
||||||
%macro SAD_8x4 0
|
|
||||||
movq xmm0, [eax]
|
|
||||||
movq xmm1, [eax+ebx]
|
|
||||||
lea eax, [eax+2*ebx]
|
|
||||||
movhps xmm0, [eax]
|
|
||||||
movhps xmm1, [eax+ebx]
|
|
||||||
|
|
||||||
movq xmm2, [ecx]
|
|
||||||
movq xmm3, [ecx+edx]
|
|
||||||
lea ecx, [ecx+2*edx]
|
|
||||||
movhps xmm2, [ecx]
|
|
||||||
movhps xmm3, [ecx+edx]
|
|
||||||
psadbw xmm0, xmm2
|
|
||||||
psadbw xmm1, xmm3
|
|
||||||
paddw xmm6, xmm0
|
|
||||||
paddw xmm6, xmm1
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
%macro CACHE_SPLIT_CHECK 3 ; address, width, cacheline
|
%macro CACHE_SPLIT_CHECK 3 ; address, width, cacheline
|
||||||
and %1, 0x1f|(%3>>1)
|
and %1, 0x1f|(%3>>1)
|
||||||
cmp %1, (32-%2)|(%3>>1)
|
cmp %1, (32-%2)|(%3>>1)
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
|
||||||
%macro SSE2_GetSad8x4 0
|
%macro SSE2_GetSad8x4 0
|
||||||
movq xmm0, [eax]
|
movq xmm0, [r0]
|
||||||
movq xmm1, [eax+ebx]
|
movq xmm1, [r0+r1]
|
||||||
lea eax, [eax+2*ebx]
|
lea r0, [r0+2*r1]
|
||||||
movhps xmm0, [eax]
|
movhps xmm0, [r0]
|
||||||
movhps xmm1, [eax+ebx]
|
movhps xmm1, [r0+r1]
|
||||||
|
|
||||||
movq xmm2, [ecx]
|
movq xmm2, [r2]
|
||||||
movq xmm3, [ecx+edx]
|
movq xmm3, [r2+r3]
|
||||||
lea ecx, [ecx+2*edx]
|
lea r2, [r2+2*r3]
|
||||||
movhps xmm2, [ecx]
|
movhps xmm2, [r2]
|
||||||
movhps xmm3, [ecx+edx]
|
movhps xmm3, [r2+r3]
|
||||||
psadbw xmm0, xmm2
|
psadbw xmm0, xmm2
|
||||||
psadbw xmm1, xmm3
|
psadbw xmm1, xmm3
|
||||||
paddw xmm6, xmm0
|
paddw xmm6, xmm0
|
||||||
@@ -100,38 +76,55 @@ SECTION .text
|
|||||||
|
|
||||||
WELS_EXTERN WelsSampleSad8x8_sse21
|
WELS_EXTERN WelsSampleSad8x8_sse21
|
||||||
WelsSampleSad8x8_sse21:
|
WelsSampleSad8x8_sse21:
|
||||||
mov ecx, [esp+12]
|
;mov ecx, [esp+12]
|
||||||
mov edx, ecx
|
;mov edx, ecx
|
||||||
CACHE_SPLIT_CHECK edx, 8, 64
|
;CACHE_SPLIT_CHECK edx, 8, 64
|
||||||
jle near .pixel_sad_8x8_nsplit
|
;jle near .pixel_sad_8x8_nsplit
|
||||||
push ebx
|
;push ebx
|
||||||
push edi
|
;push edi
|
||||||
mov eax, [esp+12]
|
;mov eax, [esp+12]
|
||||||
mov ebx, [esp+16]
|
;mov ebx, [esp+16]
|
||||||
|
|
||||||
|
%assign push_num 0
|
||||||
|
mov r2, arg3
|
||||||
|
push r2
|
||||||
|
CACHE_SPLIT_CHECK r2, 8, 64
|
||||||
|
jle near .pixel_sad_8x8_nsplit
|
||||||
|
pop r2
|
||||||
|
%ifdef X86_32
|
||||||
|
push r3
|
||||||
|
push r4
|
||||||
|
push r5
|
||||||
|
%endif
|
||||||
|
%assign push_num 3
|
||||||
|
mov r0, arg1
|
||||||
|
mov r1, arg2
|
||||||
|
SIGN_EXTENTION r1, r1d
|
||||||
pxor xmm7, xmm7
|
pxor xmm7, xmm7
|
||||||
|
|
||||||
mov edi, ecx
|
;ecx r2, edx r4, edi r5
|
||||||
and edi, 0x07
|
|
||||||
sub ecx, edi
|
|
||||||
mov edx, 8
|
|
||||||
sub edx, edi
|
|
||||||
|
|
||||||
shl edi, 3
|
mov r5, r2
|
||||||
shl edx, 3
|
and r5, 0x07
|
||||||
movd xmm5, edi
|
sub r2, r5
|
||||||
movd xmm6, edx
|
mov r4, 8
|
||||||
mov edi, 8
|
sub r4, r5
|
||||||
add edi, ecx
|
|
||||||
mov edx, [esp+24]
|
|
||||||
|
|
||||||
movq xmm0, [eax]
|
shl r5, 3
|
||||||
movhps xmm0, [eax+ebx]
|
shl r4, 3
|
||||||
|
movd xmm5, r5d
|
||||||
|
movd xmm6, r4d
|
||||||
|
mov r5, 8
|
||||||
|
add r5, r2
|
||||||
|
mov r3, arg4
|
||||||
|
SIGN_EXTENTION r3, r3d
|
||||||
|
movq xmm0, [r0]
|
||||||
|
movhps xmm0, [r0+r1]
|
||||||
|
|
||||||
movq xmm1, [ecx]
|
movq xmm1, [r2]
|
||||||
movq xmm2, [edi]
|
movq xmm2, [r5]
|
||||||
movhps xmm1, [ecx+edx]
|
movhps xmm1, [r2+r3]
|
||||||
movhps xmm2, [edi+edx]
|
movhps xmm2, [r5+r3]
|
||||||
psrlq xmm1, xmm5
|
psrlq xmm1, xmm5
|
||||||
psllq xmm2, xmm6
|
psllq xmm2, xmm6
|
||||||
por xmm1, xmm2
|
por xmm1, xmm2
|
||||||
@@ -139,17 +132,17 @@ WelsSampleSad8x8_sse21:
|
|||||||
psadbw xmm0, xmm1
|
psadbw xmm0, xmm1
|
||||||
paddw xmm7, xmm0
|
paddw xmm7, xmm0
|
||||||
|
|
||||||
lea eax, [eax+2*ebx]
|
lea r0, [r0+2*r1]
|
||||||
lea ecx, [ecx+2*edx]
|
lea r2, [r2+2*r3]
|
||||||
lea edi, [edi+2*edx]
|
lea r5, [r5+2*r3]
|
||||||
|
|
||||||
movq xmm0, [eax]
|
movq xmm0, [r0]
|
||||||
movhps xmm0, [eax+ebx]
|
movhps xmm0, [r0+r1]
|
||||||
|
|
||||||
movq xmm1, [ecx]
|
movq xmm1, [r2]
|
||||||
movq xmm2, [edi]
|
movq xmm2, [r5]
|
||||||
movhps xmm1, [ecx+edx]
|
movhps xmm1, [r2+r3]
|
||||||
movhps xmm2, [edi+edx]
|
movhps xmm2, [r5+r3]
|
||||||
psrlq xmm1, xmm5
|
psrlq xmm1, xmm5
|
||||||
psllq xmm2, xmm6
|
psllq xmm2, xmm6
|
||||||
por xmm1, xmm2
|
por xmm1, xmm2
|
||||||
@@ -157,17 +150,17 @@ WelsSampleSad8x8_sse21:
|
|||||||
psadbw xmm0, xmm1
|
psadbw xmm0, xmm1
|
||||||
paddw xmm7, xmm0
|
paddw xmm7, xmm0
|
||||||
|
|
||||||
lea eax, [eax+2*ebx]
|
lea r0, [r0+2*r1]
|
||||||
lea ecx, [ecx+2*edx]
|
lea r2, [r2+2*r3]
|
||||||
lea edi, [edi+2*edx]
|
lea r5, [r5+2*r3]
|
||||||
|
|
||||||
movq xmm0, [eax]
|
movq xmm0, [r0]
|
||||||
movhps xmm0, [eax+ebx]
|
movhps xmm0, [r0+r1]
|
||||||
|
|
||||||
movq xmm1, [ecx]
|
movq xmm1, [r2]
|
||||||
movq xmm2, [edi]
|
movq xmm2, [r5]
|
||||||
movhps xmm1, [ecx+edx]
|
movhps xmm1, [r2+r3]
|
||||||
movhps xmm2, [edi+edx]
|
movhps xmm2, [r5+r3]
|
||||||
psrlq xmm1, xmm5
|
psrlq xmm1, xmm5
|
||||||
psllq xmm2, xmm6
|
psllq xmm2, xmm6
|
||||||
por xmm1, xmm2
|
por xmm1, xmm2
|
||||||
@@ -175,17 +168,17 @@ WelsSampleSad8x8_sse21:
|
|||||||
psadbw xmm0, xmm1
|
psadbw xmm0, xmm1
|
||||||
paddw xmm7, xmm0
|
paddw xmm7, xmm0
|
||||||
|
|
||||||
lea eax, [eax+2*ebx]
|
lea r0, [r0+2*r1]
|
||||||
lea ecx, [ecx+2*edx]
|
lea r2, [r2+2*r3]
|
||||||
lea edi, [edi+2*edx]
|
lea r5, [r5+2*r3]
|
||||||
|
|
||||||
movq xmm0, [eax]
|
movq xmm0, [r0]
|
||||||
movhps xmm0, [eax+ebx]
|
movhps xmm0, [r0+r1]
|
||||||
|
|
||||||
movq xmm1, [ecx]
|
movq xmm1, [r2]
|
||||||
movq xmm2, [edi]
|
movq xmm2, [r5]
|
||||||
movhps xmm1, [ecx+edx]
|
movhps xmm1, [r2+r3]
|
||||||
movhps xmm2, [edi+edx]
|
movhps xmm2, [r5+r3]
|
||||||
psrlq xmm1, xmm5
|
psrlq xmm1, xmm5
|
||||||
psllq xmm2, xmm6
|
psllq xmm2, xmm6
|
||||||
por xmm1, xmm2
|
por xmm1, xmm2
|
||||||
@@ -195,22 +188,33 @@ WelsSampleSad8x8_sse21:
|
|||||||
|
|
||||||
movhlps xmm0, xmm7
|
movhlps xmm0, xmm7
|
||||||
paddw xmm0, xmm7
|
paddw xmm0, xmm7
|
||||||
movd eax, xmm0
|
movd retrd, xmm0
|
||||||
pop edi
|
%ifdef X86_32
|
||||||
|
pop r5
|
||||||
|
pop r4
|
||||||
|
pop r3
|
||||||
|
%endif
|
||||||
jmp .return
|
jmp .return
|
||||||
|
|
||||||
.pixel_sad_8x8_nsplit:
|
.pixel_sad_8x8_nsplit:
|
||||||
push ebx
|
;push ebx
|
||||||
mov eax, [esp+8]
|
;mov eax, [esp+8]
|
||||||
mov ebx, [esp+12]
|
;mov ebx, [esp+12]
|
||||||
mov edx, [esp+20]
|
;mov edx, [esp+20]
|
||||||
|
|
||||||
|
pop r2
|
||||||
|
%assign push_num 0
|
||||||
|
LOAD_4_PARA
|
||||||
|
SIGN_EXTENTION r1, r1d
|
||||||
|
SIGN_EXTENTION r3, r3d
|
||||||
pxor xmm6, xmm6
|
pxor xmm6, xmm6
|
||||||
SSE2_GetSad8x4
|
SSE2_GetSad8x4
|
||||||
lea eax, [eax+2*ebx]
|
lea r0, [r0+2*r1]
|
||||||
lea ecx, [ecx+2*edx]
|
lea r2, [r2+2*r3]
|
||||||
SSE2_GetSad8x4
|
SSE2_GetSad8x4
|
||||||
movhlps xmm0, xmm6
|
movhlps xmm0, xmm6
|
||||||
paddw xmm0, xmm6
|
paddw xmm0, xmm6
|
||||||
movd eax, xmm0
|
movd retrd, xmm0
|
||||||
|
LOAD_4_PARA_POP
|
||||||
.return:
|
.return:
|
||||||
pop ebx
|
|
||||||
ret
|
ret
|
||||||
@@ -39,8 +39,7 @@
|
|||||||
;*
|
;*
|
||||||
;*************************************************************************/
|
;*************************************************************************/
|
||||||
%include "asm_inc.asm"
|
%include "asm_inc.asm"
|
||||||
BITS 32
|
%ifdef X86_32
|
||||||
|
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
; Macros and other preprocessor constants
|
; Macros and other preprocessor constants
|
||||||
;***********************************************************************
|
;***********************************************************************
|
||||||
@@ -481,181 +480,6 @@ SampleVariance16x16_sse2:
|
|||||||
|
|
||||||
; , 6/7/2010
|
; , 6/7/2010
|
||||||
|
|
||||||
%ifndef NO_DYNAMIC_VP
|
|
||||||
WELS_EXTERN AnalysisVaaInfoIntra_sse2
|
|
||||||
;***********************************************************************
|
|
||||||
; int32_t AnalysisVaaInfoIntra_sse2( uint8_t *pDataY, const int32_t linesize );
|
|
||||||
;***********************************************************************
|
|
||||||
ALIGN 16
|
|
||||||
AnalysisVaaInfoIntra_sse2:
|
|
||||||
push ebx
|
|
||||||
push edx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
|
|
||||||
mov ebp, esp
|
|
||||||
and ebp, 0fh
|
|
||||||
sub esp, ebp
|
|
||||||
sub esp, 32
|
|
||||||
%define PUSH_SIZE 52 ; 20 + 32
|
|
||||||
|
|
||||||
mov esi, [esp+ebp+PUSH_SIZE+4] ; data_y
|
|
||||||
mov ecx, [esp+ebp+PUSH_SIZE+8] ; linesize
|
|
||||||
|
|
||||||
mov ebx, ecx
|
|
||||||
sal ebx, $1 ; linesize x 2 [ebx]
|
|
||||||
mov edx, ebx
|
|
||||||
add edx, ecx ; linesize x 3 [edx]
|
|
||||||
mov eax, ebx
|
|
||||||
sal eax, $1 ; linesize x 4 [eax]
|
|
||||||
|
|
||||||
pxor xmm7, xmm7
|
|
||||||
|
|
||||||
; loops
|
|
||||||
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
|
||||||
movq [esp], xmm0
|
|
||||||
|
|
||||||
lea esi, [esi+eax]
|
|
||||||
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
|
||||||
movq [esp+8], xmm0
|
|
||||||
|
|
||||||
lea esi, [esi+eax]
|
|
||||||
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
|
||||||
movq [esp+16], xmm0
|
|
||||||
|
|
||||||
lea esi, [esi+eax]
|
|
||||||
VAA_AVG_BLOCK_SSE2 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
|
||||||
movq [esp+24], xmm0
|
|
||||||
|
|
||||||
movdqa xmm0, [esp] ; block 0~7
|
|
||||||
movdqa xmm1, [esp+16] ; block 8~15
|
|
||||||
movdqa xmm2, xmm0
|
|
||||||
paddw xmm0, xmm1
|
|
||||||
SUM_WORD_8x2_SSE2 xmm0, xmm3
|
|
||||||
|
|
||||||
pmullw xmm1, xmm1
|
|
||||||
pmullw xmm2, xmm2
|
|
||||||
movdqa xmm3, xmm1
|
|
||||||
movdqa xmm4, xmm2
|
|
||||||
punpcklwd xmm1, xmm7
|
|
||||||
punpckhwd xmm3, xmm7
|
|
||||||
punpcklwd xmm2, xmm7
|
|
||||||
punpckhwd xmm4, xmm7
|
|
||||||
paddd xmm1, xmm2
|
|
||||||
paddd xmm3, xmm4
|
|
||||||
paddd xmm1, xmm3
|
|
||||||
pshufd xmm2, xmm1, 01Bh
|
|
||||||
paddd xmm1, xmm2
|
|
||||||
pshufd xmm2, xmm1, 0B1h
|
|
||||||
paddd xmm1, xmm2
|
|
||||||
|
|
||||||
movd ebx, xmm0
|
|
||||||
and ebx, 0ffffh ; effective low word truncated
|
|
||||||
mov ecx, ebx
|
|
||||||
imul ebx, ecx
|
|
||||||
sar ebx, $4
|
|
||||||
movd eax, xmm1
|
|
||||||
sub eax, ebx
|
|
||||||
|
|
||||||
%undef PUSH_SIZE
|
|
||||||
add esp, 32
|
|
||||||
add esp, ebp
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop edx
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
|
|
||||||
WELS_EXTERN AnalysisVaaInfoIntra_ssse3
|
|
||||||
;***********************************************************************
|
|
||||||
; int32_t AnalysisVaaInfoIntra_ssse3( uint8_t *pDataY, const int32_t linesize );
|
|
||||||
;***********************************************************************
|
|
||||||
ALIGN 16
|
|
||||||
AnalysisVaaInfoIntra_ssse3:
|
|
||||||
push ebx
|
|
||||||
push edx
|
|
||||||
push esi
|
|
||||||
push edi
|
|
||||||
push ebp
|
|
||||||
|
|
||||||
mov ebp, esp
|
|
||||||
and ebp, 0fh
|
|
||||||
sub esp, ebp
|
|
||||||
sub esp, 32
|
|
||||||
%define PUSH_SIZE 52 ; 20 + 32
|
|
||||||
|
|
||||||
mov esi, [esp+ebp+PUSH_SIZE+4] ; data_y
|
|
||||||
mov ecx, [esp+ebp+PUSH_SIZE+8] ; linesize
|
|
||||||
|
|
||||||
mov ebx, ecx
|
|
||||||
sal ebx, $1 ; linesize x 2 [ebx]
|
|
||||||
mov edx, ebx
|
|
||||||
add edx, ecx ; linesize x 3 [edx]
|
|
||||||
mov eax, ebx
|
|
||||||
sal eax, $1 ; linesize x 4 [eax]
|
|
||||||
|
|
||||||
pxor xmm7, xmm7
|
|
||||||
|
|
||||||
; loops
|
|
||||||
VAA_AVG_BLOCK_SSSE3 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
|
||||||
movq [esp], xmm0
|
|
||||||
|
|
||||||
lea esi, [esi+eax]
|
|
||||||
VAA_AVG_BLOCK_SSSE3 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6
|
|
||||||
movq [esp+8], xmm1
|
|
||||||
|
|
||||||
lea esi, [esi+eax]
|
|
||||||
VAA_AVG_BLOCK_SSSE3 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5
|
|
||||||
movq [esp+16], xmm0
|
|
||||||
|
|
||||||
lea esi, [esi+eax]
|
|
||||||
VAA_AVG_BLOCK_SSSE3 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6
|
|
||||||
movq [esp+24], xmm1
|
|
||||||
|
|
||||||
movdqa xmm0, [esp] ; block 0~7
|
|
||||||
movdqa xmm1, [esp+16] ; block 8~15
|
|
||||||
movdqa xmm2, xmm0
|
|
||||||
paddw xmm0, xmm1
|
|
||||||
SUM_WORD_8x2_SSE2 xmm0, xmm3 ; better performance than that of phaddw sets
|
|
||||||
|
|
||||||
pmullw xmm1, xmm1
|
|
||||||
pmullw xmm2, xmm2
|
|
||||||
movdqa xmm3, xmm1
|
|
||||||
movdqa xmm4, xmm2
|
|
||||||
punpcklwd xmm1, xmm7
|
|
||||||
punpckhwd xmm3, xmm7
|
|
||||||
punpcklwd xmm2, xmm7
|
|
||||||
punpckhwd xmm4, xmm7
|
|
||||||
paddd xmm1, xmm2
|
|
||||||
paddd xmm3, xmm4
|
|
||||||
paddd xmm1, xmm3
|
|
||||||
pshufd xmm2, xmm1, 01Bh
|
|
||||||
paddd xmm1, xmm2
|
|
||||||
pshufd xmm2, xmm1, 0B1h
|
|
||||||
paddd xmm1, xmm2
|
|
||||||
|
|
||||||
movd ebx, xmm0
|
|
||||||
and ebx, 0ffffh ; effective low work truncated
|
|
||||||
mov ecx, ebx
|
|
||||||
imul ebx, ecx
|
|
||||||
sar ebx, $4
|
|
||||||
movd eax, xmm1
|
|
||||||
sub eax, ebx
|
|
||||||
|
|
||||||
%undef PUSH_SIZE
|
|
||||||
add esp, 32
|
|
||||||
add esp, ebp
|
|
||||||
pop ebp
|
|
||||||
pop edi
|
|
||||||
pop esi
|
|
||||||
pop edx
|
|
||||||
pop ebx
|
|
||||||
ret
|
|
||||||
%endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
WELS_EXTERN abs_difference_mbrow_sse2
|
WELS_EXTERN abs_difference_mbrow_sse2
|
||||||
;*************************************************************************************************************
|
;*************************************************************************************************************
|
||||||
@@ -1587,3 +1411,4 @@ sqdiff_bgd_width_loop:
|
|||||||
%undef pushsize
|
%undef pushsize
|
||||||
%undef localsize
|
%undef localsize
|
||||||
ret
|
ret
|
||||||
|
%endif
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user