Merge pull request #172 from volvet/win64_float_fix_for_enc_b

fix win64 float issue, enable AQ assembly
This commit is contained in:
Ethan Hugg 2014-01-22 17:25:03 -08:00
commit 256cd0f610
15 changed files with 2055 additions and 1609 deletions

View File

@ -53,7 +53,7 @@
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\..\encoder\plus\inc;..\..\..\encoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\WelsThreadLib\api"
PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;WELSENCPLUS_EXPORTS;MT_ENABLED;"
PreprocessorDefinitions="WIN32;_DEBUG;_WINDOWS;_USRDLL;WELSENCPLUS_EXPORTS;MT_ENABLED;X86_ASM"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
@ -117,6 +117,105 @@
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory=".\..\..\..\..\bin\win64\Debug"
IntermediateDirectory=".\..\..\..\obj\encoder\plus\Debug"
ConfigurationType="2"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="_DEBUG"
MkTypLibCompatible="true"
SuppressStartupBanner="true"
TargetEnvironment="3"
TypeLibraryName=".\..\..\..\..\..\bin\Debug/WelsEncPlus.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\..\encoder\plus\inc;..\..\..\encoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\WelsThreadLib\api"
PreprocessorDefinitions="WIN64;_DEBUG;_WINDOWS;_USRDLL;WELSENCPLUS_EXPORTS;MT_ENABLED;X86_ASM"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
PrecompiledHeaderFile=".\..\..\..\obj\encoder\plus\Debug/WelsEncPlus.pch"
AssemblerListingLocation=".\..\..\..\obj\encoder\plus\Debug/"
ObjectFile=".\..\..\..\obj\encoder\plus\Debug/"
ProgramDataBaseFileName=".\..\..\..\obj\encoder\plus\Debug/"
WarningLevel="3"
SuppressStartupBanner="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1033"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="$(OutDir)\welsecore.lib"
OutputFile="$(OutDir)\welsenc.dll"
LinkIncremental="2"
SuppressStartupBanner="true"
AdditionalLibraryDirectories="..\..\..\..\libs"
ModuleDefinitionFile="..\..\..\encoder\plus\src\wels_enc_export.def"
GenerateDebugInformation="true"
ProgramDatabaseFile="$(OutDir)\welsenc.pdb"
GenerateMapFile="true"
MapFileName="$(OutDir)\welsenc.map"
RandomizedBaseAddress="1"
DataExecutionPrevention="2"
ImportLibrary="$(OutDir)\welsenc.lib"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile="$(OutDir)/WelsEncPlus.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory=".\..\..\..\..\bin\win32\Release"
@ -156,7 +255,7 @@
EnableFiberSafeOptimizations="true"
WholeProgramOptimization="true"
AdditionalIncludeDirectories="..\..\..\encoder\plus\inc;..\..\..\encoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\WelsThreadLib\api"
PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;WELSENCPLUS_EXPORTS;MT_ENABLED;"
PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;WELSENCPLUS_EXPORTS;MT_ENABLED;X86_ASM"
StringPooling="true"
RuntimeLibrary="2"
EnableFunctionLevelLinking="true"
@ -222,105 +321,6 @@
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory=".\..\..\..\..\bin\win64\Debug"
IntermediateDirectory=".\..\..\..\obj\encoder\plus\Debug"
ConfigurationType="2"
InheritedPropertySheets="$(VCInstallDir)VCProjectDefaults\UpgradeFromVC60.vsprops"
UseOfMFC="0"
ATLMinimizesCRunTimeLibraryUsage="false"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
PreprocessorDefinitions="_DEBUG"
MkTypLibCompatible="true"
SuppressStartupBanner="true"
TargetEnvironment="3"
TypeLibraryName=".\..\..\..\..\..\bin\Debug/WelsEncPlus.tlb"
HeaderFileName=""
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..\..\..\encoder\plus\inc;..\..\..\encoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\WelsThreadLib\api"
PreprocessorDefinitions="WIN64;_DEBUG;_WINDOWS;_USRDLL;WELSENCPLUS_EXPORTS;MT_ENABLED"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
PrecompiledHeaderFile=".\..\..\..\obj\encoder\plus\Debug/WelsEncPlus.pch"
AssemblerListingLocation=".\..\..\..\obj\encoder\plus\Debug/"
ObjectFile=".\..\..\..\obj\encoder\plus\Debug/"
ProgramDataBaseFileName=".\..\..\..\obj\encoder\plus\Debug/"
WarningLevel="3"
SuppressStartupBanner="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
PreprocessorDefinitions="_DEBUG"
Culture="1033"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
AdditionalDependencies="$(OutDir)\welsecore.lib"
OutputFile="$(OutDir)\welsenc.dll"
LinkIncremental="2"
SuppressStartupBanner="true"
AdditionalLibraryDirectories="..\..\..\..\libs"
ModuleDefinitionFile="..\..\..\encoder\plus\src\wels_enc_export.def"
GenerateDebugInformation="true"
ProgramDatabaseFile="$(OutDir)\welsenc.pdb"
GenerateMapFile="true"
MapFileName="$(OutDir)\welsenc.map"
RandomizedBaseAddress="1"
DataExecutionPrevention="2"
ImportLibrary="$(OutDir)\welsenc.lib"
TargetMachine="17"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
SuppressStartupBanner="true"
OutputFile="$(OutDir)/WelsEncPlus.bsc"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory=".\..\..\..\..\bin\win64\Release"
@ -360,7 +360,7 @@
EnableFiberSafeOptimizations="true"
WholeProgramOptimization="true"
AdditionalIncludeDirectories="..\..\..\encoder\plus\inc;..\..\..\encoder\core\inc;..\..\..\api\svc;..\..\..\common;..\..\..\WelsThreadLib\api"
PreprocessorDefinitions="WIN64;NDEBUG;_WINDOWS;_USRDLL;WELSENCPLUS_EXPORTS;MT_ENABLED;"
PreprocessorDefinitions="WIN64;NDEBUG;_WINDOWS;_USRDLL;WELSENCPLUS_EXPORTS;MT_ENABLED;X86_ASM"
StringPooling="true"
RuntimeLibrary="2"
EnableFunctionLevelLinking="true"
@ -447,7 +447,7 @@
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
@ -456,7 +456,7 @@
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
@ -491,7 +491,7 @@
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
@ -500,7 +500,7 @@
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
@ -531,7 +531,7 @@
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
Name="Debug|x64"
>
<Tool
Name="VCCLCompilerTool"
@ -540,7 +540,7 @@
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
Name="Release|Win32"
>
<Tool
Name="VCCLCompilerTool"
@ -589,7 +589,7 @@
/>
</FileConfiguration>
<FileConfiguration
Name="Release|Win32"
Name="Debug|x64"
>
<Tool
Name="VCResourceCompilerTool"
@ -598,7 +598,7 @@
/>
</FileConfiguration>
<FileConfiguration
Name="Debug|x64"
Name="Release|Win32"
>
<Tool
Name="VCResourceCompilerTool"

View File

@ -204,6 +204,9 @@ void WelsCPURestore (const uint32_t kuiCPU) {
}
}
void WelsXmmRegEmptyOp(void * pSrc) {
}
#endif

View File

@ -41,7 +41,7 @@
#define WELS_CPU_DETECTION_H__
#include "typedefs.h"
#include "cpu_core.h"
#if defined(__cplusplus)
@ -69,12 +69,56 @@ uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors);
*/
void WelsCPURestore (const uint32_t kuiCPU);
#ifdef WIN64
void WelsXmmRegStore(void * src);
void WelsXmmRegLoad(void * src);
#endif
#endif
void WelsXmmRegEmptyOp(void * pSrc);
#if defined(__cplusplus)
}
#endif//__cplusplus
typedef void (*WelsXmmRegProtectFunc)(void * pSrc);
#ifdef WIN64
#define XMMREG_PROTECT_DECLARE(name) \
WelsXmmRegProtectFunc name##load;\
WelsXmmRegProtectFunc name##store;\
uint8_t name##Buffer[160];
#define XMMREG_PROTECT_INIT(name) \
{ \
uint32_t uiCpuFlag = WelsCPUFeatureDetect(NULL);\
if( uiCpuFlag & WELS_CPU_SSE2 ){\
name##load = WelsXmmRegLoad;\
name##store = WelsXmmRegStore; \
} else { \
name##load = WelsXmmRegEmptyOp; \
name##store = WelsXmmRegEmptyOp; \
} \
}
#define XMMREG_PROTECT_UNINIT(name) \
#define XMMREG_PROTECT_STORE(name) \
name##store(name##Buffer);
#define XMMREG_PROTECT_LOAD(name) \
name##load(name##Buffer);
#else
#define XMMREG_PROTECT_DECLARE(name)
#define XMMREG_PROTECT_INIT(name)
#define XMMREG_PROTECT_UNINIT(name)
#define XMMREG_PROTECT_STORE(name)
#define XMMREG_PROTECT_LOAD(name)
#endif
#endif//WELS_CPU_DETECTION_H__

View File

@ -221,4 +221,43 @@ WelsEmms:
ret
%ifdef WIN64
WELS_EXTERN WelsXmmRegStore
ALIGN 16
;******************************************************************************************
; void WelsXmmRegStore(void *src)
;******************************************************************************************
WelsXmmRegStore:
movdqu [rcx], xmm6
movdqu [rcx+16], xmm7
movdqu [rcx+32], xmm8
movdqu [rcx+48], xmm9
movdqu [rcx+64], xmm10
movdqu [rcx+80], xmm11
movdqu [rcx+96], xmm12
movdqu [rcx+112], xmm13
movdqu [rcx+128], xmm14
movdqu [rcx+144], xmm15
ret
WELS_EXTERN WelsXmmRegLoad
ALIGN 16
;******************************************************************************************
; void WelsXmmRegLoad(void *src)
;******************************************************************************************
WelsXmmRegLoad:
movdqu xmm6, [rcx]
movdqu xmm7, [rcx+16]
movdqu xmm8, [rcx+32]
movdqu xmm9, [rcx+48]
movdqu xmm10, [rcx+64]
movdqu xmm11, [rcx+80]
movdqu xmm12, [rcx+96]
movdqu xmm13, [rcx+112]
movdqu xmm14, [rcx+128]
movdqu xmm15, [rcx+144]
ret
%endif

View File

@ -49,6 +49,7 @@
#include "encoder_context.h"
#include "param_svc.h"
#include "extern.h"
#include "cpu.h"
//#define OUTPUT_BIT_STREAM
//#define DUMP_SRC_PICTURE
@ -129,6 +130,8 @@ class CWelsH264SVCEncoder : public ISVCEncoder {
void InitEncoder (void);
int32_t RawData2SrcPic (const uint8_t* pSrc);
void DumpSrcPicture (const uint8_t* pSrc);
XMMREG_PROTECT_DECLARE(CWelsH264SVCEncoder);
};
}
#endif // !defined(AFX_WELSH264ENCODER_H__D9FAA1D1_5403_47E1_8E27_78F11EE65F02__INCLUDED_)

View File

@ -218,6 +218,7 @@ CWelsH264SVCEncoder::CWelsH264SVCEncoder()
#endif//OUTPUT_BIT_STREAM
InitEncoder();
XMMREG_PROTECT_INIT(CWelsH264SVCEncoder);
}
CWelsH264SVCEncoder::~CWelsH264SVCEncoder() {
@ -253,6 +254,7 @@ CWelsH264SVCEncoder::~CWelsH264SVCEncoder() {
#endif//OUTPUT_BIT_STREAM
Uninitialize();
XMMREG_PROTECT_UNINIT(CWelsH264SVCEncoder);
}
void CWelsH264SVCEncoder::InitEncoder (void) {
@ -628,7 +630,9 @@ int CWelsH264SVCEncoder::EncodeFrame2 (const SSourcePicture** pSrcPicList, int
int32_t iFrameType = videoFrameTypeInvalid;
if (nSrcPicNum > 0) {
XMMREG_PROTECT_STORE(CWelsH264SVCEncoder);
iFrameTypeReturned = WelsEncoderEncodeExt (m_pEncContext, pBsInfo, pSrcPicList, nSrcPicNum);
XMMREG_PROTECT_LOAD(CWelsH264SVCEncoder);
} else {
assert (0);
return videoFrameTypeInvalid;

View File

@ -137,7 +137,7 @@
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories=""
AdditionalIncludeDirectories="../../../common/"
PreprocessorDefinitions="WIN64;_DEBUG;_WINDOWS;_USRDLL;WELSVP_EXPORTS;X86_ASM"
MinimalRebuild="true"
BasicRuntimeChecks="3"
@ -313,6 +313,7 @@
Optimization="3"
EnableIntrinsicFunctions="false"
FavorSizeOrSpeed="1"
AdditionalIncludeDirectories="../../../common/"
PreprocessorDefinitions="WIN64;NDEBUG;_WINDOWS;_USRDLL;WELSVP_EXPORTS;X86_ASM"
RuntimeLibrary="0"
EnableFunctionLevelLinking="false"
@ -378,7 +379,7 @@
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\src\common\cpu.cpp"
RelativePath="..\..\..\common\cpu.cpp"
>
</File>
<File
@ -496,7 +497,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
@ -514,7 +515,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
@ -545,7 +546,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
@ -576,7 +577,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
@ -594,7 +595,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
@ -607,7 +608,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
@ -616,7 +617,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
@ -625,7 +626,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
@ -634,7 +635,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
@ -647,7 +648,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
@ -656,7 +657,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
@ -665,7 +666,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
@ -674,7 +675,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
@ -696,7 +697,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>
@ -714,7 +715,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)"
CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
Outputs="$(IntDir)\$(InputName).obj"
/>
</FileConfiguration>

View File

@ -50,9 +50,11 @@ CAdaptiveQuantization::CAdaptiveQuantization (int32_t iCpuFlag) {
m_pfVar = NULL;
WelsMemset (&m_sAdaptiveQuantParam, 0, sizeof (m_sAdaptiveQuantParam));
WelsInitVarFunc (m_pfVar, m_CPUFlag);
XMMREG_PROTECT_INIT(AdaptiveQuantization);
}
CAdaptiveQuantization::~CAdaptiveQuantization() {
XMMREG_PROTECT_UNINIT(AdaptiveQuantization);
}
EResult CAdaptiveQuantization::Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pRefPixMap) {
@ -101,6 +103,7 @@ EResult CAdaptiveQuantization::Process (int32_t iType, SPixMap* pSrcPixMap, SPix
pRefFrameTmp = pRefFrameY;
pCurFrameTmp = pCurFrameY;
for (i = 0; i < iMbWidth; i++) {
XMMREG_PROTECT_STORE(AdaptiveQuantization);
iSumDiff = pVaaCalcResults->pSad8x8[iMbIndex][0];
iSumDiff += pVaaCalcResults->pSad8x8[iMbIndex][1];
iSumDiff += pVaaCalcResults->pSad8x8[iMbIndex][2];
@ -109,6 +112,7 @@ EResult CAdaptiveQuantization::Process (int32_t iType, SPixMap* pSrcPixMap, SPix
iSQDiff = pVaaCalcResults->pSsd16x16[iMbIndex];
uiSum = pVaaCalcResults->pSum16x16[iMbIndex];
iSQSum = pVaaCalcResults->pSumOfSquare16x16[iMbIndex];
XMMREG_PROTECT_LOAD(AdaptiveQuantization);
iSumDiff = iSumDiff >> 8;
pMotionTexture->uiMotionIndex = (iSQDiff >> 8) - (iSumDiff * iSumDiff);
@ -131,7 +135,9 @@ EResult CAdaptiveQuantization::Process (int32_t iType, SPixMap* pSrcPixMap, SPix
pRefFrameTmp = pRefFrameY;
pCurFrameTmp = pCurFrameY;
for (i = 0; i < iMbWidth; i++) {
XMMREG_PROTECT_STORE(AdaptiveQuantization);
m_pfVar (pRefFrameTmp, iRefStride, pCurFrameTmp, iCurStride, pMotionTexture);
XMMREG_PROTECT_LOAD(AdaptiveQuantization);
dAverageMotionIndex += pMotionTexture->uiMotionIndex;
dAverageTextureIndex += pMotionTexture->uiTextureIndex;
pMotionTexture++;
@ -223,7 +229,7 @@ void CAdaptiveQuantization::WelsInitVarFunc (PVarFunc& pfVar, int32_t iCpuFlag)
#ifdef X86_ASM
if (iCpuFlag & WELS_CPU_SSE2) {
// pfVar = SampleVariance16x16_sse2;
pfVar = SampleVariance16x16_sse2;
}
#endif
}

View File

@ -45,6 +45,7 @@
#include "../common/memory.h"
#include "../common/WelsFrameWork.h"
#include "../../interface/IWelsVP.h"
#include "cpu.h"
WELSVP_NAMESPACE_BEGIN
@ -78,6 +79,7 @@ class CAdaptiveQuantization : public IStrategy {
PVarFunc m_pfVar;
int32_t m_CPUFlag;
SAdaptiveQuantizationParam m_sAdaptiveQuantParam;
XMMREG_PROTECT_DECLARE(AdaptiveQuantization);
};
WELSVP_NAMESPACE_END

2995
codec/processing/src/asm/vaa.asm Normal file → Executable file

File diff suppressed because it is too large Load Diff

View File

@ -1,196 +0,0 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* \file cpu.c
*
* \brief CPU compatibility detection
*
* \date 04/29/2009 Created
*
*************************************************************************************
*/
#include "util.h"
#include "cpu.h"
WELSVP_NAMESPACE_BEGIN
#define CPU_Vender_AMD "AuthenticAMD"
#define CPU_Vender_INTEL "GenuineIntel"
#define CPU_Vender_CYRIX "CyrixInstead"
#if defined(X86_ASM)
uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
uint32_t uiCPU = 0;
uint32_t uiFeatureA = 0, uiFeatureB = 0, uiFeatureC = 0, uiFeatureD = 0;
int32_t CacheLineSize = 0;
int8_t chVenderName[16] = { 0 };
if (!WelsCPUIdVerify()) {
/* cpuid is not supported in cpu */
return 0;
}
WelsCPUId (0, &uiFeatureA, (uint32_t*)&chVenderName[0], (uint32_t*)&chVenderName[8], (uint32_t*)&chVenderName[4]);
if (uiFeatureA == 0) {
/* maximum input value for basic cpuid information */
return 0;
}
WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
if ((uiFeatureD & 0x00800000) == 0) {
/* Basic MMX technology is not support in cpu, mean nothing for us so return here */
return 0;
}
uiCPU = WELS_CPU_MMX;
if (uiFeatureD & 0x02000000) {
/* SSE technology is identical to AMD MMX extensions */
uiCPU |= WELS_CPU_MMXEXT | WELS_CPU_SSE;
}
if (uiFeatureD & 0x04000000) {
/* SSE2 support here */
uiCPU |= WELS_CPU_SSE2;
}
if (uiFeatureD & 0x00000001) {
/* x87 FPU on-chip checking */
uiCPU |= WELS_CPU_FPU;
}
if (uiFeatureD & 0x00008000) {
/* CMOV instruction checking */
uiCPU |= WELS_CPU_CMOV;
}
if (!strcmp ((const str_t*)chVenderName, CPU_Vender_INTEL)) { // confirmed_safe_unsafe_usage
if (uiFeatureD & 0x10000000) {
/* Multi-Threading checking: contains of multiple logic processors */
uiCPU |= WELS_CPU_HTT;
}
}
if (uiFeatureC & 0x00000001) {
/* SSE3 support here */
uiCPU |= WELS_CPU_SSE3;
}
if (uiFeatureC & 0x00000200) {
/* SSSE3 support here */
uiCPU |= WELS_CPU_SSSE3;
}
if (uiFeatureC & 0x00080000) {
/* SSE4.1 support here, 45nm Penryn processor */
uiCPU |= WELS_CPU_SSE41;
}
if (uiFeatureC & 0x00100000) {
/* SSE4.2 support here, next generation Nehalem processor */
uiCPU |= WELS_CPU_SSE42;
}
if (WelsCPUSupportAVX (uiFeatureA, uiFeatureC)) { //
/* AVX supported */
uiCPU |= WELS_CPU_AVX;
}
if (WelsCPUSupportFMA (uiFeatureA, uiFeatureC)) { //
/* AVX FMA supported */
uiCPU |= WELS_CPU_FMA;
}
if (uiFeatureC & 0x02000000) {
/* AES checking */
uiCPU |= WELS_CPU_AES;
}
if (uiFeatureC & 0x00400000) {
/* MOVBE checking */
uiCPU |= WELS_CPU_MOVBE;
}
if (pNumberOfLogicProcessors != NULL) {
// HTT enabled on chip
*pNumberOfLogicProcessors = (uiFeatureB & 0x00ff0000) >> 16; // feature bits: 23-16 on returned EBX
}
WelsCPUId (0x80000000, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
if ((!strcmp ((const str_t*)chVenderName, CPU_Vender_AMD))
&& (uiFeatureA >= 0x80000001)) { // confirmed_safe_unsafe_usage
WelsCPUId (0x80000001, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
if (uiFeatureD & 0x00400000) {
uiCPU |= WELS_CPU_MMXEXT;
}
if (uiFeatureD & 0x80000000) {
uiCPU |= WELS_CPU_3DNOW;
}
}
if (!strcmp ((const str_t*)chVenderName, CPU_Vender_INTEL)) { // confirmed_safe_unsafe_usage
int32_t family, model;
WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
family = ((uiFeatureA >> 8) & 0xf) + ((uiFeatureA >> 20) & 0xff);
model = ((uiFeatureA >> 4) & 0xf) + ((uiFeatureA >> 12) & 0xf0);
if ((family == 6) && (model == 9 || model == 13 || model == 14)) {
uiCPU &= ~ (WELS_CPU_SSE2 | WELS_CPU_SSE3);
}
}
// get cache line size
if ((!strcmp ((const str_t*)chVenderName, CPU_Vender_INTEL))
|| ! (strcmp ((const str_t*)chVenderName, CPU_Vender_CYRIX))) { // confirmed_safe_unsafe_usage
WelsCPUId (1, &uiFeatureA, &uiFeatureB, &uiFeatureC, &uiFeatureD);
CacheLineSize = (uiFeatureB & 0xff00) >>
5; // ((clflush_line_size >> 8) << 3), CLFLUSH_line_size * 8 = CacheLineSize_in_byte
if (CacheLineSize == 128) {
uiCPU |= WELS_CPU_CACHELINE_128;
} else if (CacheLineSize == 64) {
uiCPU |= WELS_CPU_CACHELINE_64;
} else if (CacheLineSize == 32) {
uiCPU |= WELS_CPU_CACHELINE_32;
} else if (CacheLineSize == 16) {
uiCPU |= WELS_CPU_CACHELINE_16;
}
}
return uiCPU;
}
void WelsCPURestore (const uint32_t kuiCPU) {
if (kuiCPU & (WELS_CPU_MMX | WELS_CPU_MMXEXT | WELS_CPU_3DNOW | WELS_CPU_3DNOWEXT)) {
WelsEmms();
}
}
#endif
WELSVP_NAMESPACE_END

View File

@ -1,102 +0,0 @@
/*!
* \copy
* Copyright (c) 2009-2013, Cisco Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* \file cpu.h
*
* \brief CPU feature compatibility detection
*
* \date 04/29/2009 Created
*
*************************************************************************************
*/
#ifndef WELSVP_CPU_H
#define WELSVP_CPU_H
#include "typedef.h"
WELSVP_NAMESPACE_BEGIN
/*
* WELS CPU feature flags
*/
#define WELS_CPU_MMX 0x00000001 /* mmx */
#define WELS_CPU_MMXEXT 0x00000002 /* mmx-ext*/
#define WELS_CPU_SSE 0x00000004 /* sse */
#define WELS_CPU_SSE2 0x00000008 /* sse 2 */
#define WELS_CPU_SSE3 0x00000010 /* sse 3 */
#define WELS_CPU_SSE41 0x00000020 /* sse 4.1 */
#define WELS_CPU_3DNOW 0x00000040 /* 3dnow! */
#define WELS_CPU_3DNOWEXT 0x00000080 /* 3dnow! ext */
#define WELS_CPU_ALTIVEC 0x00000100 /* altivec */
#define WELS_CPU_SSSE3 0x00000200 /* ssse3 */
#define WELS_CPU_SSE42 0x00000400 /* sse 4.2 */
/* CPU features application extensive */
#define WELS_CPU_AVX 0x00000800 /* Advanced Vector eXtentions */
#define WELS_CPU_FPU 0x00001000 /* x87-FPU on chip */
#define WELS_CPU_HTT 0x00002000 /* Hyper-Threading Technology (HTT), Multi-threading enabled feature:
physical processor package is capable of supporting more than one logic processor
*/
#define WELS_CPU_CMOV 0x00004000 /* Conditional Move Instructions,
also if x87-FPU is present at indicated by the CPUID.FPU feature bit, then FCOMI and FCMOV are supported
*/
#define WELS_CPU_MOVBE 0x00008000 /* MOVBE instruction */
#define WELS_CPU_AES 0x00010000 /* AES instruction extensions */
#define WELS_CPU_FMA 0x00020000 /* AVX VEX FMA instruction sets */
#define WELS_CPU_CACHELINE_16 0x10000000 /* CacheLine Size 16 */
#define WELS_CPU_CACHELINE_32 0x20000000 /* CacheLine Size 32 */
#define WELS_CPU_CACHELINE_64 0x40000000 /* CacheLine Size 64 */
#define WELS_CPU_CACHELINE_128 0x80000000 /* CacheLine Size 128 */
/*
* Interfaces for CPU core feature detection as below
*/
#ifdef X86_ASM
WELSVP_EXTERN_C_BEGIN
int32_t WelsCPUIdVerify();
void WelsCPUId (uint32_t uiIndex, uint32_t* pFeatureA, uint32_t* pFeatureB, uint32_t* pFeatureC, uint32_t* pFeatureD);
int32_t WelsCPUSupportAVX (uint32_t eax, uint32_t ecx);
int32_t WelsCPUSupportFMA (uint32_t eax, uint32_t ecx);
void WelsEmms();
WELSVP_EXTERN_C_END
#endif
uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors);
WELSVP_NAMESPACE_END
#endif

View File

@ -58,11 +58,11 @@ void CVAACalculation::InitVaaFuncs (SVaaFuncs& sVaaFuncs, int32_t iCpuFlag) {
sVaaFuncs.pfVAACalcSadVar = VAACalcSadVar_c;
#ifdef X86_ASM
if ((iCpuFlag & WELS_CPU_SSE2) == WELS_CPU_SSE2) {
/* sVaaFuncs.pfVAACalcSad = VAACalcSad_sse2;
sVaaFuncs.pfVAACalcSadBgd = VAACalcSadBgd_sse2;
sVaaFuncs.pfVAACalcSadSsd = VAACalcSadSsd_sse2;
sVaaFuncs.pfVAACalcSadSsdBgd = VAACalcSadSsdBgd_sse2;
sVaaFuncs.pfVAACalcSadVar = VAACalcSadVar_sse2;*/
sVaaFuncs.pfVAACalcSad = VAACalcSad_sse2;
sVaaFuncs.pfVAACalcSadBgd = VAACalcSadBgd_sse2;
sVaaFuncs.pfVAACalcSadSsd = VAACalcSadSsd_sse2;
sVaaFuncs.pfVAACalcSadSsdBgd = VAACalcSadSsdBgd_sse2;
sVaaFuncs.pfVAACalcSadVar = VAACalcSadVar_sse2;
}
#endif//X86_ASM
}

View File

@ -3,7 +3,6 @@ PROCESSING_SRCDIR=codec/processing
PROCESSING_CPP_SRCS=\
$(PROCESSING_SRCDIR)/./src/adaptivequantization/AdaptiveQuantization.cpp\
$(PROCESSING_SRCDIR)/./src/backgounddetection/BackgroundDetection.cpp\
$(PROCESSING_SRCDIR)/./src/common/cpu.cpp\
$(PROCESSING_SRCDIR)/./src/common/memory.cpp\
$(PROCESSING_SRCDIR)/./src/common/thread.cpp\
$(PROCESSING_SRCDIR)/./src/common/util.cpp\