Merge pull request #1385 from ruil2/function

enable ARM assembly for SampleSad16x16
This commit is contained in:
huili2 2014-09-25 15:04:46 +08:00
commit 8945348c87
20 changed files with 495 additions and 193 deletions

View File

@ -5,6 +5,7 @@ GTEST_CPP_SRCS=\
GTEST_OBJS += $(GTEST_CPP_SRCS:.cc=.$(OBJ))
OBJS += $(GTEST_OBJS)
$(GTEST_SRCDIR)/%.$(OBJ): $(GTEST_SRCDIR)/%.cc
$(QUIET_CXX)$(CXX) $(CFLAGS) $(CXXFLAGS) $(INCLUDES) $(GTEST_CFLAGS) $(GTEST_INCLUDES) -c $(CXX_O) $<

View File

@ -25,6 +25,9 @@
F5AC94FF193EB7D800F58154 /* deblocking_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */; };
F5B8D82D190757290037849A /* mc_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5B8D82C190757290037849A /* mc_aarch64_neon.S */; };
F5BB0BB8196BB5960072D50D /* copy_mb_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F5BB0BB7196BB5960072D50D /* copy_mb_aarch64_neon.S */; };
F791965419D3B89D00F60C6B /* intra_pred_common_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F791965319D3B89D00F60C6B /* intra_pred_common_aarch64_neon.S */; };
F791965619D3B8A600F60C6B /* intra_pred_common_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F791965519D3B8A600F60C6B /* intra_pred_common_neon.S */; };
F791965919D3BE2200F60C6B /* intra_pred_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F791965819D3BE2200F60C6B /* intra_pred_common.cpp */; };
FAABAA1818E9354A00D4186F /* sad_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = FAABAA1718E9354A00D4186F /* sad_common.cpp */; };
/* End PBXBuildFile section */
@ -74,6 +77,10 @@
F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = deblocking_aarch64_neon.S; path = arm64/deblocking_aarch64_neon.S; sourceTree = "<group>"; };
F5B8D82C190757290037849A /* mc_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = mc_aarch64_neon.S; path = arm64/mc_aarch64_neon.S; sourceTree = "<group>"; };
F5BB0BB7196BB5960072D50D /* copy_mb_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = copy_mb_aarch64_neon.S; path = arm64/copy_mb_aarch64_neon.S; sourceTree = "<group>"; };
F791965319D3B89D00F60C6B /* intra_pred_common_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = intra_pred_common_aarch64_neon.S; path = arm64/intra_pred_common_aarch64_neon.S; sourceTree = "<group>"; };
F791965519D3B8A600F60C6B /* intra_pred_common_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = intra_pred_common_neon.S; sourceTree = "<group>"; };
F791965719D3BA9300F60C6B /* intra_pred_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = intra_pred_common.h; sourceTree = "<group>"; };
F791965819D3BE2200F60C6B /* intra_pred_common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = intra_pred_common.cpp; sourceTree = "<group>"; };
FAABAA1618E9353F00D4186F /* sad_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sad_common.h; sourceTree = "<group>"; };
FAABAA1718E9354A00D4186F /* sad_common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sad_common.cpp; sourceTree = "<group>"; };
/* End PBXFileReference section */
@ -93,6 +100,7 @@
4C3406B118D96EA600DFA14A /* arm */ = {
isa = PBXGroup;
children = (
F791965519D3B8A600F60C6B /* intra_pred_common_neon.S */,
4CC61F0818FF6B4B00E56EAB /* copy_mb_neon.S */,
4C3406B218D96EA600DFA14A /* arm_arch_common_macro.S */,
4C3406B318D96EA600DFA14A /* deblocking_neon.S */,
@ -105,6 +113,7 @@
4C3406B618D96EA600DFA14A /* inc */ = {
isa = PBXGroup;
children = (
F791965719D3BA9300F60C6B /* intra_pred_common.h */,
F0B204F718FD23B6005DA23F /* copy_mb.h */,
FAABAA1618E9353F00D4186F /* sad_common.h */,
4C3406B718D96EA600DFA14A /* cpu.h */,
@ -126,6 +135,7 @@
4C3406C318D96EA600DFA14A /* src */ = {
isa = PBXGroup;
children = (
F791965819D3BE2200F60C6B /* intra_pred_common.cpp */,
5BA8F2BF19603F5F00011CE4 /* common_tables.cpp */,
F0B204F818FD23BF005DA23F /* copy_mb.cpp */,
FAABAA1718E9354A00D4186F /* sad_common.cpp */,
@ -179,6 +189,7 @@
F556A81D1906669F00E156A8 /* arm64 */ = {
isa = PBXGroup;
children = (
F791965319D3B89D00F60C6B /* intra_pred_common_aarch64_neon.S */,
F5BB0BB7196BB5960072D50D /* copy_mb_aarch64_neon.S */,
F5AC94FE193EB7D800F58154 /* deblocking_aarch64_neon.S */,
F5B8D82C190757290037849A /* mc_aarch64_neon.S */,
@ -244,12 +255,15 @@
F556A8241906673900E156A8 /* arm_arch64_common_macro.S in Sources */,
F5AC94FF193EB7D800F58154 /* deblocking_aarch64_neon.S in Sources */,
4C3406CE18D96EA600DFA14A /* crt_util_safe_x.cpp in Sources */,
F791965919D3BE2200F60C6B /* intra_pred_common.cpp in Sources */,
4C3406CF18D96EA600DFA14A /* deblocking_common.cpp in Sources */,
5BA8F2C019603F5F00011CE4 /* common_tables.cpp in Sources */,
F791965419D3B89D00F60C6B /* intra_pred_common_aarch64_neon.S in Sources */,
4C3406D118D96EA600DFA14A /* WelsThreadLib.cpp in Sources */,
4C3406CC18D96EA600DFA14A /* mc_neon.S in Sources */,
F5BB0BB8196BB5960072D50D /* copy_mb_aarch64_neon.S in Sources */,
4C3406CB18D96EA600DFA14A /* expand_picture_neon.S in Sources */,
F791965619D3B8A600F60C6B /* intra_pred_common_neon.S in Sources */,
4CC61F0918FF6B4B00E56EAB /* copy_mb_neon.S in Sources */,
53C1C9BC193F0FB000404D8F /* expand_pic.cpp in Sources */,
4C3406CD18D96EA600DFA14A /* cpu.cpp in Sources */,

View File

@ -17,7 +17,6 @@
549947E2196A3FB400BA3D87 /* pixel_sad_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 549947AE196A3FB400BA3D87 /* pixel_sad_neon.S */; };
549947E3196A3FB400BA3D87 /* vaa_calc_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 549947AF196A3FB400BA3D87 /* vaa_calc_neon.S */; };
549947E4196A3FB400BA3D87 /* BackgroundDetection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947B1196A3FB400BA3D87 /* BackgroundDetection.cpp */; };
549947E5196A3FB400BA3D87 /* common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947B4196A3FB400BA3D87 /* common.cpp */; };
549947E6196A3FB400BA3D87 /* memory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947B6196A3FB400BA3D87 /* memory.cpp */; };
549947E7196A3FB400BA3D87 /* WelsFrameWork.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947BB196A3FB400BA3D87 /* WelsFrameWork.cpp */; };
549947E8196A3FB400BA3D87 /* WelsFrameWorkEx.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947BD196A3FB400BA3D87 /* WelsFrameWorkEx.cpp */; };
@ -34,6 +33,7 @@
549947F3196A3FB400BA3D87 /* vaacalcfuncs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947D8196A3FB400BA3D87 /* vaacalcfuncs.cpp */; };
549947F4196A3FB400BA3D87 /* vaacalculation.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947D9196A3FB400BA3D87 /* vaacalculation.cpp */; };
6C749B78197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 6C749B77197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S */; };
F791965B19D3BF6B00F60C6B /* intra_pred_common.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F791965A19D3BF6B00F60C6B /* intra_pred_common.cpp */; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
@ -64,7 +64,6 @@
549947AF196A3FB400BA3D87 /* vaa_calc_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = vaa_calc_neon.S; sourceTree = "<group>"; };
549947B1196A3FB400BA3D87 /* BackgroundDetection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = BackgroundDetection.cpp; sourceTree = "<group>"; };
549947B2196A3FB400BA3D87 /* BackgroundDetection.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BackgroundDetection.h; sourceTree = "<group>"; };
549947B4196A3FB400BA3D87 /* common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = common.cpp; sourceTree = "<group>"; };
549947B5196A3FB400BA3D87 /* common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = common.h; sourceTree = "<group>"; };
549947B6196A3FB400BA3D87 /* memory.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = memory.cpp; sourceTree = "<group>"; };
549947B7196A3FB400BA3D87 /* memory.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memory.h; sourceTree = "<group>"; };
@ -97,6 +96,7 @@
549947D9196A3FB400BA3D87 /* vaacalculation.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = vaacalculation.cpp; sourceTree = "<group>"; };
549947DA196A3FB400BA3D87 /* vaacalculation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vaacalculation.h; sourceTree = "<group>"; };
6C749B77197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = adaptive_quantization_aarch64_neon.S; path = arm64/adaptive_quantization_aarch64_neon.S; sourceTree = "<group>"; };
F791965A19D3BF6B00F60C6B /* intra_pred_common.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = intra_pred_common.cpp; path = ../../../common/src/intra_pred_common.cpp; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@ -210,7 +210,7 @@
549947B3196A3FB400BA3D87 /* common */ = {
isa = PBXGroup;
children = (
549947B4196A3FB400BA3D87 /* common.cpp */,
F791965A19D3BF6B00F60C6B /* intra_pred_common.cpp */,
549947B5196A3FB400BA3D87 /* common.h */,
549947B6196A3FB400BA3D87 /* memory.cpp */,
549947B7196A3FB400BA3D87 /* memory.h */,
@ -351,7 +351,6 @@
549947E9196A3FB400BA3D87 /* ComplexityAnalysis.cpp in Sources */,
549947E3196A3FB400BA3D87 /* vaa_calc_neon.S in Sources */,
549947EE196A3FB400BA3D87 /* imagerotate.cpp in Sources */,
549947E5196A3FB400BA3D87 /* common.cpp in Sources */,
549947EA196A3FB400BA3D87 /* denoise.cpp in Sources */,
549947E7196A3FB400BA3D87 /* WelsFrameWork.cpp in Sources */,
549947F1196A3FB400BA3D87 /* ScrollDetection.cpp in Sources */,
@ -367,6 +366,7 @@
4CC6094F197E009D00BE8B8B /* down_sample_aarch64_neon.S in Sources */,
4CC6095A1980F34F00BE8B8B /* vaa_calc_aarch64_neon.S in Sources */,
549947F2196A3FB400BA3D87 /* ScrollDetectionFuncs.cpp in Sources */,
F791965B19D3BF6B00F60C6B /* intra_pred_common.cpp in Sources */,
549947EF196A3FB400BA3D87 /* imagerotatefuncs.cpp in Sources */,
549947DF196A3FB400BA3D87 /* AdaptiveQuantization.cpp in Sources */,
549947EC196A3FB400BA3D87 /* downsample.cpp in Sources */,

View File

@ -381,6 +381,10 @@
RelativePath="..\..\..\encoder\core\src\get_intra_predictor.cpp"
>
</File>
<File
RelativePath="..\..\..\common\src\intra_pred_common.cpp"
>
</File>
<File
RelativePath="..\..\..\encoder\core\src\mc.cpp"
>
@ -959,6 +963,46 @@
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\..\common\x86\intra_pred_com.asm"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\..\encoder\core\x86\matrix_transpose.asm"
>

View File

@ -0,0 +1,83 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON
.text
#include "arm_arch_common_macro.S"
WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredV_neon
//Get the top line data to 'q0'
sub r3, r1, r2
vldm r3, {d0, d1}
//mov r2, #16
mov r3, #4
//Set the top line to the each line of MB(16*16)
loop_0_get_i16x16_luma_pred_v:
vst1.8 {d0,d1}, [r0]!
vst1.8 {d0,d1}, [r0]!
vst1.8 {d0,d1}, [r0]!
vst1.8 {d0,d1}, [r0]!
subs r3, #1
bne loop_0_get_i16x16_luma_pred_v
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredH_neon
//stmdb sp!, {r4, lr}
sub r1, r1, #1
mov r3, #4
loop_0_get_i16x16_luma_pred_h:
//Get one byte data from left side
vld1.8 {d0[],d1[]}, [r1], r2
vld1.8 {d2[],d3[]}, [r1], r2
vld1.8 {d4[],d5[]}, [r1], r2
vld1.8 {d6[],d7[]}, [r1], r2
//Set the line of MB using the left side byte data
vst1.8 {d0,d1}, [r0]!
//add r0, #16
vst1.8 {d2,d3}, [r0]!
//add r0, #16
vst1.8 {d4,d5}, [r0]!
//add r0, #16
vst1.8 {d6,d7}, [r0]!
//add r0, #16
subs r3, #1
bne loop_0_get_i16x16_luma_pred_h
WELS_ASM_FUNC_END
#endif

View File

@ -0,0 +1,55 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
#ifdef HAVE_NEON_AARCH64
.text
#include "arm_arch64_common_macro.S"
//for Luma 16x16
WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredV_AArch64_neon
sub x3, x1, x2
ld1 {v0.16b}, [x3]
.rept 16
st1 {v0.16b}, [x0], 16
.endr
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredH_AArch64_neon
sub x3, x1, #1
.rept 16
ld1r {v0.16b}, [x3], x2
st1 {v0.16b}, [x0], 16
.endr
WELS_ASM_AARCH64_FUNC_END
#endif

View File

@ -0,0 +1,76 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file intra_pred_common.h
*
* \brief interfaces for intra predictor about 16x16.
*
* \date 4/2/2014 Created
*
*************************************************************************************
*/
#ifndef INTRA_PRED_COMMON_H
#define INTRA_PRED_COMMON_H
#include "typedefs.h"
void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
#if defined(__cplusplus)
extern "C" {
#endif//__cplusplus
#if defined(X86_ASM)
//for intra-prediction ASM functions
void WelsI16x16LumaPredV_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredH_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
#endif//X86_ASM
#if defined(HAVE_NEON)
void WelsI16x16LumaPredV_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredH_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
#endif//HAVE_NEON
#if defined(HAVE_NEON_AARCH64)
void WelsI16x16LumaPredV_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredH_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
#endif//HAVE_NEON_AARCH64
#if defined(__cplusplus)
}
#endif//__cplusplus
#endif//

View File

@ -1,6 +1,6 @@
/*!
* \copy
* Copyright (c) 2013, Cisco Systems
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -28,12 +28,21 @@
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*
* \file get_intra_predictor.c
*
* \brief implementation for get intra predictor about 16x16, 4x4, chroma.
*
* \date 4/2/2009 Created
* 9/14/2009 C level based optimization with high performance gained.
* [const, using ST32/ST64 to replace memset, memcpy and memmove etc.]
*
*************************************************************************************
*/
#include "common.h"
#include "ls_defines.h"
#include "cpu_core.h"
#include "intra_pred_common.h"
WELSVP_NAMESPACE_BEGIN
void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
uint8_t i = 15;
@ -66,4 +75,3 @@ void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStrid
} while (i-- > 0);
}
WELSVP_NAMESPACE_END

View File

@ -6,6 +6,7 @@ COMMON_CPP_SRCS=\
$(COMMON_SRCDIR)/src/crt_util_safe_x.cpp\
$(COMMON_SRCDIR)/src/deblocking_common.cpp\
$(COMMON_SRCDIR)/src/expand_pic.cpp\
$(COMMON_SRCDIR)/src/intra_pred_common.cpp\
$(COMMON_SRCDIR)/src/sad_common.cpp\
$(COMMON_SRCDIR)/src/utils.cpp\
$(COMMON_SRCDIR)/src/welsCodecTrace.cpp\
@ -17,6 +18,7 @@ COMMON_ASM_SRCS=\
$(COMMON_SRCDIR)/x86/cpuid.asm\
$(COMMON_SRCDIR)/x86/deblock.asm\
$(COMMON_SRCDIR)/x86/expand_picture.asm\
$(COMMON_SRCDIR)/x86/intra_pred_com.asm\
$(COMMON_SRCDIR)/x86/mb_copy.asm\
$(COMMON_SRCDIR)/x86/mc_chroma.asm\
$(COMMON_SRCDIR)/x86/mc_luma.asm\
@ -33,6 +35,7 @@ COMMON_ASM_ARM_SRCS=\
$(COMMON_SRCDIR)/arm/copy_mb_neon.S\
$(COMMON_SRCDIR)/arm/deblocking_neon.S\
$(COMMON_SRCDIR)/arm/expand_picture_neon.S\
$(COMMON_SRCDIR)/arm/intra_pred_common_neon.S\
$(COMMON_SRCDIR)/arm/mc_neon.S\
COMMON_OBJSARM += $(COMMON_ASM_ARM_SRCS:.S=.$(OBJ))
@ -45,6 +48,7 @@ COMMON_ASM_ARM64_SRCS=\
$(COMMON_SRCDIR)/arm64/copy_mb_aarch64_neon.S\
$(COMMON_SRCDIR)/arm64/deblocking_aarch64_neon.S\
$(COMMON_SRCDIR)/arm64/expand_picture_aarch64_neon.S\
$(COMMON_SRCDIR)/arm64/intra_pred_common_aarch64_neon.S\
$(COMMON_SRCDIR)/arm64/mc_aarch64_neon.S\
COMMON_OBJSARM64 += $(COMMON_ASM_ARM64_SRCS:.S=.$(OBJ))

View File

@ -0,0 +1,117 @@
;*!
;* \copy
;* Copyright (c) 2009-2013, Cisco Systems
;* All rights reserved.
;*
;* Redistribution and use in source and binary forms, with or without
;* modification, are permitted provided that the following conditions
;* are met:
;*
;* * Redistributions of source code must retain the above copyright
;* notice, this list of conditions and the following disclaimer.
;*
;* * Redistributions in binary form must reproduce the above copyright
;* notice, this list of conditions and the following disclaimer in
;* the documentation and/or other materials provided with the
;* distribution.
;*
;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;* POSSIBILITY OF SUCH DAMAGE.
;*
;*
;* intra_pred_common.asm
;*
;* Abstract
;* sse2 function for intra predict operations
;*
;* History
;* 18/09/2009 Created
;*
;*
;*************************************************************************/
%include "asm_inc.asm"
;***********************************************************************
; Local Data (Read Only)
;***********************************************************************
SECTION .rodata align=16
;***********************************************************************
; void WelsI16x16LumaPredH_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
;***********************************************************************
%macro SSE2_PRED_H_16X16_ONE_LINE 0
add r0, 16
add r1, r2
movzx r3, byte [r1]
SSE2_Copy16Times xmm0, r3d
movdqa [r0], xmm0
%endmacro
WELS_EXTERN WelsI16x16LumaPredH_sse2
push r3
%assign push_num 1
LOAD_3_PARA
SIGN_EXTENSION r2, r2d
dec r1
movzx r3, byte [r1]
SSE2_Copy16Times xmm0, r3d
movdqa [r0], xmm0
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
pop r3
ret
;***********************************************************************
; void WelsI16x16LumaPredV_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
;***********************************************************************
WELS_EXTERN WelsI16x16LumaPredV_sse2
%assign push_num 0
LOAD_3_PARA
SIGN_EXTENSION r2, r2d
sub r1, r2
movdqa xmm0, [r1]
movdqa [r0], xmm0
movdqa [r0+10h], xmm0
movdqa [r0+20h], xmm0
movdqa [r0+30h], xmm0
movdqa [r0+40h], xmm0
movdqa [r0+50h], xmm0
movdqa [r0+60h], xmm0
movdqa [r0+70h], xmm0
movdqa [r0+80h], xmm0
movdqa [r0+90h], xmm0
movdqa [r0+160], xmm0
movdqa [r0+176], xmm0
movdqa [r0+192], xmm0
movdqa [r0+208], xmm0
movdqa [r0+224], xmm0
movdqa [r0+240], xmm0
ret

View File

@ -62,51 +62,6 @@
#endif
WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredV_neon
//Get the top line data to 'q0'
sub r3, r1, r2
vldm r3, {d0, d1}
//mov r2, #16
mov r3, #4
//Set the top line to the each line of MB(16*16)
loop_0_get_i16x16_luma_pred_v:
vst1.8 {d0,d1}, [r0]!
vst1.8 {d0,d1}, [r0]!
vst1.8 {d0,d1}, [r0]!
vst1.8 {d0,d1}, [r0]!
subs r3, #1
bne loop_0_get_i16x16_luma_pred_v
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredH_neon
//stmdb sp!, {r4, lr}
sub r1, r1, #1
mov r3, #4
loop_0_get_i16x16_luma_pred_h:
//Get one byte data from left side
vld1.8 {d0[],d1[]}, [r1], r2
vld1.8 {d2[],d3[]}, [r1], r2
vld1.8 {d4[],d5[]}, [r1], r2
vld1.8 {d6[],d7[]}, [r1], r2
//Set the line of MB using the left side byte data
vst1.8 {d0,d1}, [r0]!
//add r0, #16
vst1.8 {d2,d3}, [r0]!
//add r0, #16
vst1.8 {d4,d5}, [r0]!
//add r0, #16
vst1.8 {d6,d7}, [r0]!
//add r0, #16
subs r3, #1
bne loop_0_get_i16x16_luma_pred_h
WELS_ASM_FUNC_END
WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredDc_neon
//stmdb sp!, { r2-r5, lr}
//Get the left vertical line data

View File

@ -349,23 +349,6 @@ WELS_ASM_AARCH64_FUNC_BEGIN WelsIChromaPredPlane_AArch64_neon
.endr
WELS_ASM_AARCH64_FUNC_END
//for Luma 16x16
WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredV_AArch64_neon
sub x3, x1, x2
ld1 {v0.16b}, [x3]
.rept 16
st1 {v0.16b}, [x0], 16
.endr
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredH_AArch64_neon
sub x3, x1, #1
.rept 16
ld1r {v0.16b}, [x3], x2
st1 {v0.16b}, [x0], 16
.endr
WELS_ASM_AARCH64_FUNC_END
WELS_ASM_AARCH64_FUNC_BEGIN WelsI16x16LumaPredDc_AArch64_neon
sub x3, x1, x2
sub x4, x1, #1

View File

@ -74,8 +74,6 @@ void WelsIChromaPredDcNA_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStrid
void WelsI16x16ChromaPredVer (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16ChromaPredHor (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredPlane_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredDcLeft_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
@ -94,8 +92,6 @@ void WelsFillingPred8x2to16_sse2 (uint8_t* pPred, uint8_t* pValue);
void WelsFillingPred1to16_sse2 (uint8_t* pPred, const uint8_t kuiValue);
//for intra-prediction ASM functions
void WelsI16x16LumaPredV_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredH_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredDc_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredPlane_sse2 (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
@ -116,8 +112,6 @@ void WelsI4x4LumaPredHU_mmx (uint8_t* pPred, uint8_t* pRef, const int32_t kiStri
#endif//X86_ASM
#if defined(HAVE_NEON)
void WelsI16x16LumaPredV_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredH_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredDc_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredPlane_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
@ -137,8 +131,6 @@ void WelsIChromaPredPlane_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiS
#endif//HAVE_NEON
#if defined(HAVE_NEON_AARCH64)
void WelsI16x16LumaPredV_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredH_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredDc_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredPlane_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
void WelsI16x16LumaPredDcTop_AArch64_neon (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);

View File

@ -41,6 +41,7 @@
*/
#include "ls_defines.h"
#include "cpu_core.h"
#include "intra_pred_common.h"
#include "get_intra_predictor.h"
namespace WelsEnc {
@ -538,37 +539,6 @@ void WelsIChromaPredDcNA_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStrid
}
void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
uint8_t i = 15;
const int8_t* kpSrc = (int8_t*)&pRef[-kiStride];
const uint64_t kuiT1 = LD64 (kpSrc);
const uint64_t kuiT2 = LD64 (kpSrc + 8);
uint8_t* pDst = pPred;
do {
ST64 (pDst , kuiT1);
ST64 (pDst + 8, kuiT2);
pDst += 16;
} while (i-- > 0);
}
void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
int32_t iStridex15 = (kiStride << 4) - kiStride;
int32_t iPredStride = 16;
int32_t iPredStridex15 = 240; //(iPredStride<<4)-iPredStride;
uint8_t i = 15;
do {
const uint8_t kuiSrc8 = pRef[iStridex15 - 1];
const uint64_t kuiV64 = (uint64_t) (0x0101010101010101ULL * kuiSrc8);
ST64 (&pPred[iPredStridex15], kuiV64);
ST64 (&pPred[iPredStridex15 + 8], kuiV64);
iStridex15 -= kiStride;
iPredStridex15 -= iPredStride;
} while (i-- > 0);
}
void WelsI16x16LumaPredPlane_c (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride) {
int32_t iLTshift = 0, iTopshift = 0, iLeftshift = 0, iTopSum = 0, iLeftSum = 0;
int32_t i, j;

View File

@ -40,7 +40,7 @@
#include "sample.h"
#include "sad_common.h"
#include "intra_pred_common.h"
#include "mc.h"
#include "cpu_core.h"
@ -250,8 +250,8 @@ int32_t WelsSampleSadIntra8x8Combined3_c (uint8_t* pDecCb, int32_t iDecStride, u
}
extern void WelsI16x16LumaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
extern void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
extern void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
//extern void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
//extern void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
int32_t WelsSampleSatdIntra16x16Combined3_c (uint8_t* pDec, int32_t iDecStride, uint8_t* pEnc, int32_t iEncStride,
int32_t* pBestMode, int32_t iLambda, uint8_t* pDst) {

View File

@ -306,74 +306,6 @@ get_i16x16_luma_pred_plane_sse2_1:
pop r3
ret
;***********************************************************************
; void WelsI16x16LumaPredH_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
;***********************************************************************
%macro SSE2_PRED_H_16X16_ONE_LINE 0
add r0, 16
add r1, r2
movzx r3, byte [r1]
SSE2_Copy16Times xmm0, r3d
movdqa [r0], xmm0
%endmacro
WELS_EXTERN WelsI16x16LumaPredH_sse2
push r3
%assign push_num 1
LOAD_3_PARA
SIGN_EXTENSION r2, r2d
dec r1
movzx r3, byte [r1]
SSE2_Copy16Times xmm0, r3d
movdqa [r0], xmm0
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
SSE2_PRED_H_16X16_ONE_LINE
pop r3
ret
;***********************************************************************
; void WelsI16x16LumaPredV_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
;***********************************************************************
WELS_EXTERN WelsI16x16LumaPredV_sse2
%assign push_num 0
LOAD_3_PARA
SIGN_EXTENSION r2, r2d
sub r1, r2
movdqa xmm0, [r1]
movdqa [r0], xmm0
movdqa [r0+10h], xmm0
movdqa [r0+20h], xmm0
movdqa [r0+30h], xmm0
movdqa [r0+40h], xmm0
movdqa [r0+50h], xmm0
movdqa [r0+60h], xmm0
movdqa [r0+70h], xmm0
movdqa [r0+80h], xmm0
movdqa [r0+90h], xmm0
movdqa [r0+160], xmm0
movdqa [r0+176], xmm0
movdqa [r0+192], xmm0
movdqa [r0+208], xmm0
movdqa [r0+224], xmm0
movdqa [r0+240], xmm0
ret
;***********************************************************************
; void WelsIChromaPredPlane_sse2(uint8_t *pred, uint8_t *pRef, int32_t stride);
;***********************************************************************

View File

@ -358,11 +358,11 @@
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\src\common\common.cpp"
RelativePath="..\..\..\common\src\cpu.cpp"
>
</File>
<File
RelativePath="..\..\..\common\src\cpu.cpp"
RelativePath="..\..\..\common\src\intra_pred_common.cpp"
>
</File>
<File
@ -425,6 +425,10 @@
RelativePath="..\..\..\common\inc\cpu.h"
>
</File>
<File
RelativePath="..\..\..\common\inc\intra_pred_common.h"
>
</File>
<File
RelativePath="..\..\src\common\memory.h"
>
@ -573,6 +577,46 @@
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\..\common\x86\intra_pred_com.asm"
>
<FileConfiguration
Name="Debug|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
<FileConfiguration
Name="Release|x64"
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
</File>
<File
RelativePath="..\..\..\common\x86\satd_sad.asm"
>

View File

@ -38,14 +38,24 @@
*
*/
#ifndef WELSVP_SCENECHANGEDETECTIONCOMMON_H
#define WELSVP_SCENECHANGEDETECTIONCOMMON_H
#ifndef WELSVP_COMMON_H
#define WELSVP_COMMON_H
#include "util.h"
#include "memory.h"
#include "WelsFrameWork.h"
#include "IWelsVP.h"
#include "sad_common.h"
#include "intra_pred_common.h"
typedef void (GetIntraPred) (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
typedef GetIntraPred* GetIntraPredPtr;
GetIntraPred WelsI16x16LumaPredV_c;
GetIntraPred WelsI16x16LumaPredH_c;
WELSVP_NAMESPACE_BEGIN
@ -56,12 +66,6 @@ typedef SadFunc* SadFuncPtr;
typedef int32_t (Sad16x16Func) (uint8_t* pSrcY, int32_t iSrcStrideY, uint8_t* pRefY, int32_t iRefStrideY);
typedef Sad16x16Func* PSad16x16Func;
typedef void (GetIntraPred) (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
typedef GetIntraPred* GetIntraPredPtr;
GetIntraPred WelsI16x16LumaPredV_c;
GetIntraPred WelsI16x16LumaPredH_c;
#ifdef HAVE_NEON
WELSVP_EXTERN_C_BEGIN

View File

@ -33,10 +33,10 @@
#include "ComplexityAnalysis.h"
#include "cpu.h"
#include "macros.h"
#include "intra_pred_common.h"
WELSVP_NAMESPACE_BEGIN
///////////////////////////////////////////////////////////////////////////////////////////////////////////////
CComplexityAnalysis::CComplexityAnalysis (int32_t iCpuFlag) {
@ -280,8 +280,29 @@ CComplexityAnalysisScreen::CComplexityAnalysisScreen (int32_t iCpuFlag) {
#ifdef X86_ASM
if (iCpuFlag & WELS_CPU_SSE2) {
m_pSadFunc = WelsSampleSad16x16_sse2;
m_pIntraFunc[0] = WelsI16x16LumaPredV_sse2;
m_pIntraFunc[1] = WelsI16x16LumaPredH_sse2;
}
#endif
#if defined (HAVE_NEON)
if (iCpuFlag & WELS_CPU_NEON) {
m_pSadFunc = WelsSampleSad16x16_neon;
m_pIntraFunc[0] = WelsI16x16LumaPredV_neon;
m_pIntraFunc[1] = WelsI16x16LumaPredH_neon;
}
#endif
#if defined (HAVE_NEON_AARCH64)
if (iCpuFlag & WELS_CPU_NEON) {
m_pSadFunc = WelsSampleSad16x16_AArch64_neon;
m_pIntraFunc[0] = WelsI16x16LumaPredV_AArch64_neon;
m_pIntraFunc[1] = WelsI16x16LumaPredH_AArch64_neon;
}
#endif
}
CComplexityAnalysisScreen::~CComplexityAnalysisScreen() {

View File

@ -2,7 +2,6 @@ PROCESSING_SRCDIR=codec/processing
PROCESSING_CPP_SRCS=\
$(PROCESSING_SRCDIR)/src/adaptivequantization/AdaptiveQuantization.cpp\
$(PROCESSING_SRCDIR)/src/backgrounddetection/BackgroundDetection.cpp\
$(PROCESSING_SRCDIR)/src/common/common.cpp\
$(PROCESSING_SRCDIR)/src/common/memory.cpp\
$(PROCESSING_SRCDIR)/src/common/WelsFrameWork.cpp\
$(PROCESSING_SRCDIR)/src/common/WelsFrameWorkEx.cpp\