Merge pull request #1385 from ruil2/function
enable ARM assembly for SampleSad16x16
This commit is contained in:
commit
8945348c87
@ -5,6 +5,7 @@ GTEST_CPP_SRCS=\
|
||||
GTEST_OBJS += $(GTEST_CPP_SRCS:.cc=.$(OBJ))
|
||||
|
||||
OBJS += $(GTEST_OBJS)
|
||||
|
||||
$(GTEST_SRCDIR)/%.$(OBJ): $(GTEST_SRCDIR)/%.cc
|
||||
$(QUIET_CXX)$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(GTEST_CFLAGS) $(GTEST_INCLUDES) -c $(CXX_O) $<
|
||||
|
||||
|
@ -25,6 +25,9 @@
|
||||
F5AC94FF193EB7D800F58154 /* deblocking_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */; };
|
||||
F5B8D82D190757290037849A /* mc_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5B8D82C190757290037849A /* mc_aarch64_neon.S */; };
|
||||
F5BB0BB8196BB5960072D50D /* copy_mb_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5BB0BB7196BB5960072D50D /* copy_mb_aarch64_neon.S */; };
|
||||
F791965419D3B89D00F60C6B /* intra_pred_common_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F791965319D3B89D00F60C6B /* intra_pred_common_aarch64_neon.S */; };
|
||||
F791965619D3B8A600F60C6B /* intra_pred_common_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F791965519D3B8A600F60C6B /* intra_pred_common_neon.S */; };
|
||||
F791965919D3BE2200F60C6B /* intra_pred_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F791965819D3BE2200F60C6B /* intra_pred_common.cpp */; };
|
||||
FAABAA1818E9354A00D4186F /* sad_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = FAABAA1718E9354A00D4186F /* sad_common.cpp */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
@ -74,6 +77,10 @@
|
||||
F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = deblocking_aarch64_neon.S; path = arm64/deblocking_aarch64_neon.S; sourceTree = "<group>"; };
|
||||
F5B8D82C190757290037849A /* mc_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = mc_aarch64_neon.S; path = arm64/mc_aarch64_neon.S; sourceTree = "<group>"; };
|
||||
F5BB0BB7196BB5960072D50D /* copy_mb_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = copy_mb_aarch64_neon.S; path = arm64/copy_mb_aarch64_neon.S; sourceTree = "<group>"; };
|
||||
F791965319D3B89D00F60C6B /* intra_pred_common_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = intra_pred_common_aarch64_neon.S; path = arm64/intra_pred_common_aarch64_neon.S; sourceTree = "<group>"; };
|
||||
F791965519D3B8A600F60C6B /* intra_pred_common_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = intra_pred_common_neon.S; sourceTree = "<group>"; };
|
||||
F791965719D3BA9300F60C6B /* intra_pred_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = intra_pred_common.h; sourceTree = "<group>"; };
|
||||
F791965819D3BE2200F60C6B /* intra_pred_common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = intra_pred_common.cpp; sourceTree = "<group>"; };
|
||||
FAABAA1618E9353F00D4186F /* sad_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sad_common.h; sourceTree = "<group>"; };
|
||||
FAABAA1718E9354A00D4186F /* sad_common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sad_common.cpp; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
@ -93,6 +100,7 @@
|
||||
4C3406B118D96EA600DFA14A /* arm */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
F791965519D3B8A600F60C6B /* intra_pred_common_neon.S */,
|
||||
4CC61F0818FF6B4B00E56EAB /* copy_mb_neon.S */,
|
||||
4C3406B218D96EA600DFA14A /* arm_arch_common_macro.S */,
|
||||
4C3406B318D96EA600DFA14A /* deblocking_neon.S */,
|
||||
@ -105,6 +113,7 @@
|
||||
4C3406B618D96EA600DFA14A /* inc */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
F791965719D3BA9300F60C6B /* intra_pred_common.h */,
|
||||
F0B204F718FD23B6005DA23F /* copy_mb.h */,
|
||||
FAABAA1618E9353F00D4186F /* sad_common.h */,
|
||||
4C3406B718D96EA600DFA14A /* cpu.h */,
|
||||
@ -126,6 +135,7 @@
|
||||
4C3406C318D96EA600DFA14A /* src */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
F791965819D3BE2200F60C6B /* intra_pred_common.cpp */,
|
||||
5BA8F2BF19603F5F00011CE4 /* common_tables.cpp */,
|
||||
F0B204F818FD23BF005DA23F /* copy_mb.cpp */,
|
||||
FAABAA1718E9354A00D4186F /* sad_common.cpp */,
|
||||
@ -179,6 +189,7 @@
|
||||
F556A81D1906669F00E156A8 /* arm64 */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
F791965319D3B89D00F60C6B /* intra_pred_common_aarch64_neon.S */,
|
||||
F5BB0BB7196BB5960072D50D /* copy_mb_aarch64_neon.S */,
|
||||
F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */,
|
||||
F5B8D82C190757290037849A /* mc_aarch64_neon.S */,
|
||||
@ -244,12 +255,15 @@
|
||||
F556A8241906673900E156A8 /* arm_arch64_common_macro.S in Sources */,
|
||||
F5AC94FF193EB7D800F58154 /* deblocking_aarch64_neon.S in Sources */,
|
||||
4C3406CE18D96EA600DFA14A /* crt_util_safe_x.cpp in Sources */,
|
||||
F791965919D3BE2200F60C6B /* intra_pred_common.cpp in Sources */,
|
||||
4C3406CF18D96EA600DFA14A /* deblocking_common.cpp in Sources */,
|
||||
5BA8F2C019603F5F00011CE4 /* common_tables.cpp in Sources */,
|
||||
F791965419D3B89D00F60C6B /* intra_pred_common_aarch64_neon.S in Sources */,
|
||||
4C3406D118D96EA600DFA14A /* WelsThreadLib.cpp in Sources */,
|
||||
4C3406CC18D96EA600DFA14A /* mc_neon.S in Sources */,
|
||||
F5BB0BB8196BB5960072D50D /* copy_mb_aarch64_neon.S in Sources */,
|
||||
4C3406CB18D96EA600DFA14A /* expand_picture_neon.S in Sources */,
|
||||
F791965619D3B8A600F60C6B /* intra_pred_common_neon.S in Sources */,
|
||||
4CC61F0918FF6B4B00E56EAB /* copy_mb_neon.S in Sources */,
|
||||
53C1C9BC193F0FB000404D8F /* expand_pic.cpp in Sources */,
|
||||
4C3406CD18D96EA600DFA14A /* cpu.cpp in Sources */,
|
||||
|
@ -17,7 +17,6 @@
|
||||
549947E2196A3FB400BA3D87 /* pixel_sad_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 549947AE196A3FB400BA3D87 /* pixel_sad_neon.S */; };
|
||||
549947E3196A3FB400BA3D87 /* vaa_calc_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 549947AF196A3FB400BA3D87 /* vaa_calc_neon.S */; };
|
||||
549947E4196A3FB400BA3D87 /* BackgroundDetection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947B1196A3FB400BA3D87 /* BackgroundDetection.cpp */; };
|
||||
549947E5196A3FB400BA3D87 /* common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947B4196A3FB400BA3D87 /* common.cpp */; };
|
||||
549947E6196A3FB400BA3D87 /* memory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947B6196A3FB400BA3D87 /* memory.cpp */; };
|
||||
549947E7196A3FB400BA3D87 /* WelsFrameWork.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947BB196A3FB400BA3D87 /* WelsFrameWork.cpp */; };
|
||||
549947E8196A3FB400BA3D87 /* WelsFrameWorkEx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947BD196A3FB400BA3D87 /* WelsFrameWorkEx.cpp */; };
|
||||
@ -34,6 +33,7 @@
|
||||
549947F3196A3FB400BA3D87 /* vaacalcfuncs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947D8196A3FB400BA3D87 /* vaacalcfuncs.cpp */; };
|
||||
549947F4196A3FB400BA3D87 /* vaacalculation.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947D9196A3FB400BA3D87 /* vaacalculation.cpp */; };
|
||||
6C749B78197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 6C749B77197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S */; };
|
||||
F791965B19D3BF6B00F60C6B /* intra_pred_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F791965A19D3BF6B00F60C6B /* intra_pred_common.cpp */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXCopyFilesBuildPhase section */
|
||||
@ -64,7 +64,6 @@
|
||||
549947AF196A3FB400BA3D87 /* vaa_calc_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = vaa_calc_neon.S; sourceTree = "<group>"; };
|
||||
549947B1196A3FB400BA3D87 /* BackgroundDetection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = BackgroundDetection.cpp; sourceTree = "<group>"; };
|
||||
549947B2196A3FB400BA3D87 /* BackgroundDetection.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BackgroundDetection.h; sourceTree = "<group>"; };
|
||||
549947B4196A3FB400BA3D87 /* common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = common.cpp; sourceTree = "<group>"; };
|
||||
549947B5196A3FB400BA3D87 /* common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = common.h; sourceTree = "<group>"; };
|
||||
549947B6196A3FB400BA3D87 /* memory.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = memory.cpp; sourceTree = "<group>"; };
|
||||
549947B7196A3FB400BA3D87 /* memory.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memory.h; sourceTree = "<group>"; };
|
||||
@ -97,6 +96,7 @@
|
||||
549947D9196A3FB400BA3D87 /* vaacalculation.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = vaacalculation.cpp; sourceTree = "<group>"; };
|
||||
549947DA196A3FB400BA3D87 /* vaacalculation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vaacalculation.h; sourceTree = "<group>"; };
|
||||
6C749B77197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = adaptive_quantization_aarch64_neon.S; path = arm64/adaptive_quantization_aarch64_neon.S; sourceTree = "<group>"; };
|
||||
F791965A19D3BF6B00F60C6B /* intra_pred_common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = intra_pred_common.cpp; path = ../../../common/src/intra_pred_common.cpp; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
@ -210,7 +210,7 @@
|
||||
549947B3196A3FB400BA3D87 /* common */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
549947B4196A3FB400BA3D87 /* common.cpp */,
|
||||
F791965A19D3BF6B00F60C6B /* intra_pred_common.cpp */,
|
||||
549947B5196A3FB400BA3D87 /* common.h */,
|
||||
549947B6196A3FB400BA3D87 /* memory.cpp */,
|
||||
549947B7196A3FB400BA3D87 /* memory.h */,
|
||||
@ -351,7 +351,6 @@
|
||||
549947E9196A3FB400BA3D87 /* ComplexityAnalysis.cpp in Sources */,
|
||||
549947E3196A3FB400BA3D87 /* vaa_calc_neon.S in Sources */,
|
||||
549947EE196A3FB400BA3D87 /* imagerotate.cpp in Sources */,
|
||||
549947E5196A3FB400BA3D87 /* common.cpp in Sources */,
|
||||
549947EA196A3FB400BA3D87 /* denoise.cpp in Sources */,
|
||||
549947E7196A3FB400BA3D87 /* WelsFrameWork.cpp in Sources */,
|
||||
549947F1196A3FB400BA3D87 /* ScrollDetection.cpp in Sources */,
|
||||
@ -367,6 +366,7 @@
|
||||
4CC6094F197E009D00BE8B8B /* down_sample_aarch64_neon.S in Sources */,
|
||||
4CC6095A1980F34F00BE8B8B /* vaa_calc_aarch64_neon.S in Sources */,
|
||||
549947F2196A3FB400BA3D87 /* ScrollDetectionFuncs.cpp in Sources */,
|
||||
F791965B19D3BF6B00F60C6B /* intra_pred_common.cpp in Sources */,
|
||||
549947EF196A3FB400BA3D87 /* imagerotatefuncs.cpp in Sources */,
|
||||
549947DF196A3FB400BA3D87 /* AdaptiveQuantization.cpp in Sources */,
|
||||
549947EC196A3FB400BA3D87 /* downsample.cpp in Sources */,
|
||||
|
@ -381,6 +381,10 @@
|
||||
RelativePath="..\..\..\encoder\core\src\get_intra_predictor.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\common\src\intra_pred_common.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\encoder\core\src\mc.cpp"
|
||||
>
|
||||
@ -959,6 +963,46 @@
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\common\x86\intra_pred_com.asm"
|
||||
>
|
||||
<FileConfiguration
|
||||
Name="Debug|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\encoder\core\x86\matrix_transpose.asm"
|
||||
>
|
||||
|
83
codec/common/arm/intra_pred_common_neon.S
Normal file
83
codec/common/arm/intra_pred_common_neon.S
Normal file
@ -0,0 +1,83 @@
|
||||
/*!
|
||||
* \copy
|
||||
* Copyright (c) 2013, Cisco Systems
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
.text
|
||||
#include "arm_arch_common_macro.S"
|
||||
|
||||
WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredV_neon
|
||||
//Get the top line data to 'q0'
|
||||
sub r3, r1, r2
|
||||
vldm r3, {d0, d1}
|
||||
|
||||
//mov r2, #16
|
||||
mov r3, #4
|
||||
//Set the top line to the each line of MB(16*16)
|
||||
loop_0_get_i16x16_luma_pred_v:
|
||||
vst1.8 {d0,d1}, [r0]!
|
||||
vst1.8 {d0,d1}, [r0]!
|
||||
vst1.8 {d0,d1}, [r0]!
|
||||
vst1.8 {d0,d1}, [r0]!
|
||||
subs r3, #1
|
||||
bne loop_0_get_i16x16_luma_pred_v
|
||||
WELS_ASM_FUNC_END
|
||||
|
||||
|
||||
WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredH_neon
|
||||
//stmdb sp!, {r4, lr}
|
||||
sub r1, r1, #1
|
||||
mov r3, #4
|
||||
loop_0_get_i16x16_luma_pred_h:
|
||||
//Get one byte data from left side
|
||||
vld1.8 {d0[],d1[]}, [r1], r2
|
||||
vld1.8 {d2[],d3[]}, [r1], r2
|
||||
vld1.8 {d4[],d5[]}, [r1], r2
|
||||
vld1.8 {d6[],d7[]}, [r1], r2
|
||||
|
||||
//Set the line of MB using the left side byte data
|
||||
vst1.8 {d0,d1}, [r0]!
|
||||
//add r0, #16
|
||||
vst1.8 {d2,d3}, [r0]!
|
||||
//add r0, #16
|
||||
vst1.8 {d4,d5}, [r0]!
|
||||
//add r0, #16
|
||||
vst1.8 {d6,d7}, [r0]!
|
||||
//add r0, #16
|
||||
|
||||
subs r3, #1
|
||||
bne loop_0_get_i16x16_luma_pred_h
|
||||
|
||||
WELS_ASM_FUNC_END
|
||||
|
||||
|
||||
#endif
|
55
codec/common/arm64/intra_pred_common_aarch64_neon.S
Normal file
55
codec/common/arm64/intra_pred_common_aarch64_neon.S
Normal file
@ -0,0 +1,55 @@
|
||||
/*!
|
||||
* \copy
|
||||
* Copyright (c) 2013, Cisco Systems
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef HAVE_NEON_AARCH64
|
||||
.text
|
||||
#include "arm_arch64_common_macro.S"
|
||||
|
||||
//for Luma 16x16
|
||||
WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredV_AArch64_neon
|
||||
sub x3, x1, x2
|
||||
ld1 {v0.16b}, [x3]
|
||||
.rept 16
|
||||
st1 {v0.16b}, [x0], 16
|
||||
.endr
|
||||
WELS_ASM_AARCH64_FUNC_END
|
||||
|
||||
WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredH_AArch64_neon
|
||||
sub x3, x1, #1
|
||||
.rept 16
|
||||
ld1r {v0.16b}, [x3], x2
|
||||
st1 {v0.16b}, [x0], 16
|
||||
.endr
|
||||
WELS_ASM_AARCH64_FUNC_END
|
||||
|
||||
#endif
|
||||
|
76
codec/common/inc/intra_pred_common.h
Normal file
76
codec/common/inc/intra_pred_common.h
Normal file
@ -0,0 +1,76 @@
|
||||
/*!
|
||||
* \copy
|
||||
* Copyright (c) 2009-2013, Cisco Systems
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*
|
||||
* \file intra_pred_common.h
|
||||
*
|
||||
* \brief interfaces for intra predictor about 16x16.
|
||||
*
|
||||
* \date 4/2/2014 Created
|
||||
*
|
||||
*************************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef INTRA_PRED_COMMON_H
|
||||
#define INTRA_PRED_COMMON_H
|
||||
|
||||
#include "typedefs.h"
|
||||
|
||||
|
||||
void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif//__cplusplus
|
||||
|
||||
#if defined(X86_ASM)
|
||||
//for intra-prediction ASM functions
|
||||
void WelsI16x16LumaPredV_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredH_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
#endif//X86_ASM
|
||||
|
||||
#if defined(HAVE_NEON)
|
||||
void WelsI16x16LumaPredV_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredH_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
#endif//HAVE_NEON
|
||||
|
||||
#if defined(HAVE_NEON_AARCH64)
|
||||
void WelsI16x16LumaPredV_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredH_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
#endif//HAVE_NEON_AARCH64
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif//__cplusplus
|
||||
#endif//
|
||||
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*!
|
||||
* \copy
|
||||
* Copyright (c) 2013, Cisco Systems
|
||||
* Copyright (c) 2009-2013, Cisco Systems
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
@ -28,12 +28,21 @@
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*
|
||||
* \file get_intra_predictor.c
|
||||
*
|
||||
* \brief implementation for get intra predictor about 16x16, 4x4, chroma.
|
||||
*
|
||||
* \date 4/2/2009 Created
|
||||
* 9/14/2009 C level based optimization with high performance gained.
|
||||
* [const, using ST32/ST64 to replace memset, memcpy and memmove etc.]
|
||||
*
|
||||
*************************************************************************************
|
||||
*/
|
||||
|
||||
#include "common.h"
|
||||
#include "ls_defines.h"
|
||||
#include "cpu_core.h"
|
||||
#include "intra_pred_common.h"
|
||||
|
||||
WELSVP_NAMESPACE_BEGIN
|
||||
|
||||
void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
|
||||
uint8_t i = 15;
|
||||
@ -66,4 +75,3 @@ void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStrid
|
||||
} while (i-- > 0);
|
||||
}
|
||||
|
||||
WELSVP_NAMESPACE_END
|
@ -6,6 +6,7 @@ COMMON_CPP_SRCS=\
|
||||
$(COMMON_SRCDIR)/src/crt_util_safe_x.cpp\
|
||||
$(COMMON_SRCDIR)/src/deblocking_common.cpp\
|
||||
$(COMMON_SRCDIR)/src/expand_pic.cpp\
|
||||
$(COMMON_SRCDIR)/src/intra_pred_common.cpp\
|
||||
$(COMMON_SRCDIR)/src/sad_common.cpp\
|
||||
$(COMMON_SRCDIR)/src/utils.cpp\
|
||||
$(COMMON_SRCDIR)/src/welsCodecTrace.cpp\
|
||||
@ -17,6 +18,7 @@ COMMON_ASM_SRCS=\
|
||||
$(COMMON_SRCDIR)/x86/cpuid.asm\
|
||||
$(COMMON_SRCDIR)/x86/deblock.asm\
|
||||
$(COMMON_SRCDIR)/x86/expand_picture.asm\
|
||||
$(COMMON_SRCDIR)/x86/intra_pred_com.asm\
|
||||
$(COMMON_SRCDIR)/x86/mb_copy.asm\
|
||||
$(COMMON_SRCDIR)/x86/mc_chroma.asm\
|
||||
$(COMMON_SRCDIR)/x86/mc_luma.asm\
|
||||
@ -33,6 +35,7 @@ COMMON_ASM_ARM_SRCS=\
|
||||
$(COMMON_SRCDIR)/arm/copy_mb_neon.S\
|
||||
$(COMMON_SRCDIR)/arm/deblocking_neon.S\
|
||||
$(COMMON_SRCDIR)/arm/expand_picture_neon.S\
|
||||
$(COMMON_SRCDIR)/arm/intra_pred_common_neon.S\
|
||||
$(COMMON_SRCDIR)/arm/mc_neon.S\
|
||||
|
||||
COMMON_OBJSARM += $(COMMON_ASM_ARM_SRCS:.S=.$(OBJ))
|
||||
@ -45,6 +48,7 @@ COMMON_ASM_ARM64_SRCS=\
|
||||
$(COMMON_SRCDIR)/arm64/copy_mb_aarch64_neon.S\
|
||||
$(COMMON_SRCDIR)/arm64/deblocking_aarch64_neon.S\
|
||||
$(COMMON_SRCDIR)/arm64/expand_picture_aarch64_neon.S\
|
||||
$(COMMON_SRCDIR)/arm64/intra_pred_common_aarch64_neon.S\
|
||||
$(COMMON_SRCDIR)/arm64/mc_aarch64_neon.S\
|
||||
|
||||
COMMON_OBJSARM64 += $(COMMON_ASM_ARM64_SRCS:.S=.$(OBJ))
|
||||
|
117
codec/common/x86/intra_pred_com.asm
Normal file
117
codec/common/x86/intra_pred_com.asm
Normal file
@ -0,0 +1,117 @@
|
||||
;*!
|
||||
;* \copy
|
||||
;* Copyright (c) 2009-2013, Cisco Systems
|
||||
;* All rights reserved.
|
||||
;*
|
||||
;* Redistribution and use in source and binary forms, with or without
|
||||
;* modification, are permitted provided that the following conditions
|
||||
;* are met:
|
||||
;*
|
||||
;* * Redistributions of source code must retain the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer.
|
||||
;*
|
||||
;* * Redistributions in binary form must reproduce the above copyright
|
||||
;* notice, this list of conditions and the following disclaimer in
|
||||
;* the documentation and/or other materials provided with the
|
||||
;* distribution.
|
||||
;*
|
||||
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
;* POSSIBILITY OF SUCH DAMAGE.
|
||||
;*
|
||||
;*
|
||||
;* intra_pred_common.asm
|
||||
;*
|
||||
;* Abstract
|
||||
;* sse2 function for intra predict operations
|
||||
;*
|
||||
;* History
|
||||
;* 18/09/2009 Created
|
||||
;*
|
||||
;*
|
||||
;*************************************************************************/
|
||||
%include "asm_inc.asm"
|
||||
|
||||
;***********************************************************************
|
||||
; Local Data (Read Only)
|
||||
;***********************************************************************
|
||||
|
||||
SECTION .rodata align=16
|
||||
|
||||
;***********************************************************************
|
||||
; void WelsI16x16LumaPredH_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
|
||||
;***********************************************************************
|
||||
|
||||
%macro SSE2_PRED_H_16X16_ONE_LINE 0
|
||||
add r0, 16
|
||||
add r1, r2
|
||||
movzx r3, byte [r1]
|
||||
SSE2_Copy16Times xmm0, r3d
|
||||
movdqa [r0], xmm0
|
||||
%endmacro
|
||||
|
||||
WELS_EXTERN WelsI16x16LumaPredH_sse2
|
||||
push r3
|
||||
%assign push_num 1
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
dec r1
|
||||
movzx r3, byte [r1]
|
||||
SSE2_Copy16Times xmm0, r3d
|
||||
movdqa [r0], xmm0
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
pop r3
|
||||
ret
|
||||
|
||||
;***********************************************************************
|
||||
; void WelsI16x16LumaPredV_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsI16x16LumaPredV_sse2
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
sub r1, r2
|
||||
movdqa xmm0, [r1]
|
||||
|
||||
movdqa [r0], xmm0
|
||||
movdqa [r0+10h], xmm0
|
||||
movdqa [r0+20h], xmm0
|
||||
movdqa [r0+30h], xmm0
|
||||
movdqa [r0+40h], xmm0
|
||||
movdqa [r0+50h], xmm0
|
||||
movdqa [r0+60h], xmm0
|
||||
movdqa [r0+70h], xmm0
|
||||
movdqa [r0+80h], xmm0
|
||||
movdqa [r0+90h], xmm0
|
||||
movdqa [r0+160], xmm0
|
||||
movdqa [r0+176], xmm0
|
||||
movdqa [r0+192], xmm0
|
||||
movdqa [r0+208], xmm0
|
||||
movdqa [r0+224], xmm0
|
||||
movdqa [r0+240], xmm0
|
||||
|
||||
ret
|
||||
|
@ -62,51 +62,6 @@
|
||||
#endif
|
||||
|
||||
|
||||
WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredV_neon
|
||||
//Get the top line data to 'q0'
|
||||
sub r3, r1, r2
|
||||
vldm r3, {d0, d1}
|
||||
|
||||
//mov r2, #16
|
||||
mov r3, #4
|
||||
//Set the top line to the each line of MB(16*16)
|
||||
loop_0_get_i16x16_luma_pred_v:
|
||||
vst1.8 {d0,d1}, [r0]!
|
||||
vst1.8 {d0,d1}, [r0]!
|
||||
vst1.8 {d0,d1}, [r0]!
|
||||
vst1.8 {d0,d1}, [r0]!
|
||||
subs r3, #1
|
||||
bne loop_0_get_i16x16_luma_pred_v
|
||||
WELS_ASM_FUNC_END
|
||||
|
||||
|
||||
WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredH_neon
|
||||
//stmdb sp!, {r4, lr}
|
||||
sub r1, r1, #1
|
||||
mov r3, #4
|
||||
loop_0_get_i16x16_luma_pred_h:
|
||||
//Get one byte data from left side
|
||||
vld1.8 {d0[],d1[]}, [r1], r2
|
||||
vld1.8 {d2[],d3[]}, [r1], r2
|
||||
vld1.8 {d4[],d5[]}, [r1], r2
|
||||
vld1.8 {d6[],d7[]}, [r1], r2
|
||||
|
||||
//Set the line of MB using the left side byte data
|
||||
vst1.8 {d0,d1}, [r0]!
|
||||
//add r0, #16
|
||||
vst1.8 {d2,d3}, [r0]!
|
||||
//add r0, #16
|
||||
vst1.8 {d4,d5}, [r0]!
|
||||
//add r0, #16
|
||||
vst1.8 {d6,d7}, [r0]!
|
||||
//add r0, #16
|
||||
|
||||
subs r3, #1
|
||||
bne loop_0_get_i16x16_luma_pred_h
|
||||
|
||||
WELS_ASM_FUNC_END
|
||||
|
||||
|
||||
WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredDc_neon
|
||||
//stmdb sp!, { r2-r5, lr}
|
||||
//Get the left vertical line data
|
||||
|
@ -349,23 +349,6 @@ WELS_ASM_AARCH64_FUNC_BEGIN WelsIChromaPredPlane_AArch64_neon
|
||||
.endr
|
||||
WELS_ASM_AARCH64_FUNC_END
|
||||
|
||||
//for Luma 16x16
|
||||
WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredV_AArch64_neon
|
||||
sub x3, x1, x2
|
||||
ld1 {v0.16b}, [x3]
|
||||
.rept 16
|
||||
st1 {v0.16b}, [x0], 16
|
||||
.endr
|
||||
WELS_ASM_AARCH64_FUNC_END
|
||||
|
||||
WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredH_AArch64_neon
|
||||
sub x3, x1, #1
|
||||
.rept 16
|
||||
ld1r {v0.16b}, [x3], x2
|
||||
st1 {v0.16b}, [x0], 16
|
||||
.endr
|
||||
WELS_ASM_AARCH64_FUNC_END
|
||||
|
||||
WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredDc_AArch64_neon
|
||||
sub x3, x1, x2
|
||||
sub x4, x1, #1
|
||||
|
@ -74,8 +74,6 @@ void WelsIChromaPredDcNA_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStrid
|
||||
void WelsI16x16ChromaPredVer (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16ChromaPredHor (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
|
||||
void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredPlane_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredDcLeft_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
@ -94,8 +92,6 @@ void WelsFillingPred8x2to16_sse2 (uint8_t* pPred, uint8_t* pValue);
|
||||
void WelsFillingPred1to16_sse2 (uint8_t* pPred, const uint8_t kuiValue);
|
||||
|
||||
//for intra-prediction ASM functions
|
||||
void WelsI16x16LumaPredV_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredH_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredDc_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredPlane_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
|
||||
@ -116,8 +112,6 @@ void WelsI4x4LumaPredHU_mmx (uint8_t* pPred, uint8_t* pRef, const int32_t kiStri
|
||||
#endif//X86_ASM
|
||||
|
||||
#if defined(HAVE_NEON)
|
||||
void WelsI16x16LumaPredV_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredH_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredDc_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredPlane_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
|
||||
@ -137,8 +131,6 @@ void WelsIChromaPredPlane_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiS
|
||||
#endif//HAVE_NEON
|
||||
|
||||
#if defined(HAVE_NEON_AARCH64)
|
||||
void WelsI16x16LumaPredV_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredH_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredDc_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredPlane_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
void WelsI16x16LumaPredDcTop_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
|
@ -41,6 +41,7 @@
|
||||
*/
|
||||
#include "ls_defines.h"
|
||||
#include "cpu_core.h"
|
||||
#include "intra_pred_common.h"
|
||||
#include "get_intra_predictor.h"
|
||||
|
||||
namespace WelsEnc {
|
||||
@ -538,37 +539,6 @@ void WelsIChromaPredDcNA_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStrid
|
||||
}
|
||||
|
||||
|
||||
void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
|
||||
uint8_t i = 15;
|
||||
const int8_t* kpSrc = (int8_t*)&pRef[-kiStride];
|
||||
const uint64_t kuiT1 = LD64 (kpSrc);
|
||||
const uint64_t kuiT2 = LD64 (kpSrc + 8);
|
||||
uint8_t* pDst = pPred;
|
||||
|
||||
do {
|
||||
ST64 (pDst , kuiT1);
|
||||
ST64 (pDst + 8, kuiT2);
|
||||
pDst += 16;
|
||||
} while (i-- > 0);
|
||||
}
|
||||
|
||||
void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
|
||||
int32_t iStridex15 = (kiStride << 4) - kiStride;
|
||||
int32_t iPredStride = 16;
|
||||
int32_t iPredStridex15 = 240; //(iPredStride<<4)-iPredStride;
|
||||
uint8_t i = 15;
|
||||
|
||||
do {
|
||||
const uint8_t kuiSrc8 = pRef[iStridex15 - 1];
|
||||
const uint64_t kuiV64 = (uint64_t) (0x0101010101010101ULL * kuiSrc8);
|
||||
ST64 (&pPred[iPredStridex15], kuiV64);
|
||||
ST64 (&pPred[iPredStridex15 + 8], kuiV64);
|
||||
|
||||
iStridex15 -= kiStride;
|
||||
iPredStridex15 -= iPredStride;
|
||||
} while (i-- > 0);
|
||||
}
|
||||
|
||||
void WelsI16x16LumaPredPlane_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
|
||||
int32_t iLTshift = 0, iTopshift = 0, iLeftshift = 0, iTopSum = 0, iLeftSum = 0;
|
||||
int32_t i, j;
|
||||
|
@ -40,7 +40,7 @@
|
||||
|
||||
#include "sample.h"
|
||||
#include "sad_common.h"
|
||||
|
||||
#include "intra_pred_common.h"
|
||||
#include "mc.h"
|
||||
#include "cpu_core.h"
|
||||
|
||||
@ -250,8 +250,8 @@ int32_t WelsSampleSadIntra8x8Combined3_c (uint8_t* pDecCb, int32_t iDecStride, u
|
||||
}
|
||||
|
||||
extern void WelsI16x16LumaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
|
||||
extern void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
|
||||
extern void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
|
||||
//extern void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
|
||||
//extern void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
|
||||
|
||||
int32_t WelsSampleSatdIntra16x16Combined3_c (uint8_t* pDec, int32_t iDecStride, uint8_t* pEnc, int32_t iEncStride,
|
||||
int32_t* pBestMode, int32_t iLambda, uint8_t* pDst) {
|
||||
|
@ -306,74 +306,6 @@ get_i16x16_luma_pred_plane_sse2_1:
|
||||
pop r3
|
||||
ret
|
||||
|
||||
;***********************************************************************
|
||||
; void WelsI16x16LumaPredH_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
|
||||
;***********************************************************************
|
||||
|
||||
%macro SSE2_PRED_H_16X16_ONE_LINE 0
|
||||
add r0, 16
|
||||
add r1, r2
|
||||
movzx r3, byte [r1]
|
||||
SSE2_Copy16Times xmm0, r3d
|
||||
movdqa [r0], xmm0
|
||||
%endmacro
|
||||
|
||||
WELS_EXTERN WelsI16x16LumaPredH_sse2
|
||||
push r3
|
||||
%assign push_num 1
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
dec r1
|
||||
movzx r3, byte [r1]
|
||||
SSE2_Copy16Times xmm0, r3d
|
||||
movdqa [r0], xmm0
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
SSE2_PRED_H_16X16_ONE_LINE
|
||||
pop r3
|
||||
ret
|
||||
|
||||
;***********************************************************************
|
||||
; void WelsI16x16LumaPredV_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
|
||||
;***********************************************************************
|
||||
WELS_EXTERN WelsI16x16LumaPredV_sse2
|
||||
%assign push_num 0
|
||||
LOAD_3_PARA
|
||||
SIGN_EXTENSION r2, r2d
|
||||
sub r1, r2
|
||||
movdqa xmm0, [r1]
|
||||
|
||||
movdqa [r0], xmm0
|
||||
movdqa [r0+10h], xmm0
|
||||
movdqa [r0+20h], xmm0
|
||||
movdqa [r0+30h], xmm0
|
||||
movdqa [r0+40h], xmm0
|
||||
movdqa [r0+50h], xmm0
|
||||
movdqa [r0+60h], xmm0
|
||||
movdqa [r0+70h], xmm0
|
||||
movdqa [r0+80h], xmm0
|
||||
movdqa [r0+90h], xmm0
|
||||
movdqa [r0+160], xmm0
|
||||
movdqa [r0+176], xmm0
|
||||
movdqa [r0+192], xmm0
|
||||
movdqa [r0+208], xmm0
|
||||
movdqa [r0+224], xmm0
|
||||
movdqa [r0+240], xmm0
|
||||
|
||||
ret
|
||||
|
||||
;***********************************************************************
|
||||
; void WelsIChromaPredPlane_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
|
||||
;***********************************************************************
|
||||
|
@ -358,11 +358,11 @@
|
||||
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\src\common\common.cpp"
|
||||
RelativePath="..\..\..\common\src\cpu.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\common\src\cpu.cpp"
|
||||
RelativePath="..\..\..\common\src\intra_pred_common.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
@ -425,6 +425,10 @@
|
||||
RelativePath="..\..\..\common\inc\cpu.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\common\inc\intra_pred_common.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\src\common\memory.h"
|
||||
>
|
||||
@ -573,6 +577,46 @@
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\common\x86\intra_pred_com.asm"
|
||||
>
|
||||
<FileConfiguration
|
||||
Name="Debug|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|Win32"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release|x64"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
|
||||
Outputs="$(IntDir)\$(InputName).obj"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\common\x86\satd_sad.asm"
|
||||
>
|
||||
|
@ -38,14 +38,24 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef WELSVP_SCENECHANGEDETECTIONCOMMON_H
|
||||
#define WELSVP_SCENECHANGEDETECTIONCOMMON_H
|
||||
#ifndef WELSVP_COMMON_H
|
||||
#define WELSVP_COMMON_H
|
||||
|
||||
#include "util.h"
|
||||
#include "memory.h"
|
||||
#include "WelsFrameWork.h"
|
||||
#include "IWelsVP.h"
|
||||
#include "sad_common.h"
|
||||
#include "intra_pred_common.h"
|
||||
|
||||
|
||||
|
||||
typedef void (GetIntraPred) (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
|
||||
typedef GetIntraPred* GetIntraPredPtr;
|
||||
|
||||
GetIntraPred WelsI16x16LumaPredV_c;
|
||||
GetIntraPred WelsI16x16LumaPredH_c;
|
||||
|
||||
WELSVP_NAMESPACE_BEGIN
|
||||
|
||||
@ -56,12 +66,6 @@ typedef SadFunc* SadFuncPtr;
|
||||
typedef int32_t (Sad16x16Func) (uint8_t* pSrcY, int32_t iSrcStrideY, uint8_t* pRefY, int32_t iRefStrideY);
|
||||
typedef Sad16x16Func* PSad16x16Func;
|
||||
|
||||
typedef void (GetIntraPred) (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
|
||||
|
||||
typedef GetIntraPred* GetIntraPredPtr;
|
||||
|
||||
GetIntraPred WelsI16x16LumaPredV_c;
|
||||
GetIntraPred WelsI16x16LumaPredH_c;
|
||||
|
||||
#ifdef HAVE_NEON
|
||||
WELSVP_EXTERN_C_BEGIN
|
||||
|
@ -33,10 +33,10 @@
|
||||
#include "ComplexityAnalysis.h"
|
||||
#include "cpu.h"
|
||||
#include "macros.h"
|
||||
#include "intra_pred_common.h"
|
||||
|
||||
WELSVP_NAMESPACE_BEGIN
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
CComplexityAnalysis::CComplexityAnalysis (int32_t iCpuFlag) {
|
||||
@ -280,8 +280,29 @@ CComplexityAnalysisScreen::CComplexityAnalysisScreen (int32_t iCpuFlag) {
|
||||
#ifdef X86_ASM
|
||||
if (iCpuFlag & WELS_CPU_SSE2) {
|
||||
m_pSadFunc = WelsSampleSad16x16_sse2;
|
||||
m_pIntraFunc[0] = WelsI16x16LumaPredV_sse2;
|
||||
m_pIntraFunc[1] = WelsI16x16LumaPredH_sse2;
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined (HAVE_NEON)
|
||||
if (iCpuFlag & WELS_CPU_NEON) {
|
||||
m_pSadFunc = WelsSampleSad16x16_neon;
|
||||
m_pIntraFunc[0] = WelsI16x16LumaPredV_neon;
|
||||
m_pIntraFunc[1] = WelsI16x16LumaPredH_neon;
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined (HAVE_NEON_AARCH64)
|
||||
if (iCpuFlag & WELS_CPU_NEON) {
|
||||
m_pSadFunc = WelsSampleSad16x16_AArch64_neon;
|
||||
m_pIntraFunc[0] = WelsI16x16LumaPredV_AArch64_neon;
|
||||
m_pIntraFunc[1] = WelsI16x16LumaPredH_AArch64_neon;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
CComplexityAnalysisScreen::~CComplexityAnalysisScreen() {
|
||||
|
@ -2,7 +2,6 @@ PROCESSING_SRCDIR=codec/processing
|
||||
PROCESSING_CPP_SRCS=\
|
||||
$(PROCESSING_SRCDIR)/src/adaptivequantization/AdaptiveQuantization.cpp\
|
||||
$(PROCESSING_SRCDIR)/src/backgrounddetection/BackgroundDetection.cpp\
|
||||
$(PROCESSING_SRCDIR)/src/common/common.cpp\
|
||||
$(PROCESSING_SRCDIR)/src/common/memory.cpp\
|
||||
$(PROCESSING_SRCDIR)/src/common/WelsFrameWork.cpp\
|
||||
$(PROCESSING_SRCDIR)/src/common/WelsFrameWorkEx.cpp\
|
||||
|
Loading…
x
Reference in New Issue
Block a user