Neon optimization for an NS function.
Review URL: http://webrtc-codereview.appspot.com/89017 git-svn-id: http://webrtc.googlecode.com/svn/trunk@334 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
@@ -28,12 +28,14 @@ MY_WEBRTC_COMMON_DEFS += \
|
|||||||
ifeq ($(ARCH_ARM_HAVE_NEON),true)
|
ifeq ($(ARCH_ARM_HAVE_NEON),true)
|
||||||
MY_WEBRTC_COMMON_DEFS += \
|
MY_WEBRTC_COMMON_DEFS += \
|
||||||
'-DWEBRTC_ARCH_ARM_NEON'
|
'-DWEBRTC_ARCH_ARM_NEON'
|
||||||
|
CFLAGS_NEON = -flax-vector-conversions
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
|
ifeq ($(ARCH_ARM_HAVE_ARMV7A),true)
|
||||||
MY_WEBRTC_COMMON_DEFS += \
|
MY_WEBRTC_COMMON_DEFS += \
|
||||||
'-DWEBRTC_ARCH_ARM_V7A'
|
'-DWEBRTC_ARCH_ARM_V7A'
|
||||||
endif
|
endif
|
||||||
|
|
||||||
else ifeq ($(TARGET_ARCH),x86)
|
else ifeq ($(TARGET_ARCH),x86)
|
||||||
MY_WEBRTC_COMMON_DEFS += \
|
MY_WEBRTC_COMMON_DEFS += \
|
||||||
'-DWEBRTC_USE_SSE2'
|
'-DWEBRTC_USE_SSE2'
|
||||||
|
|||||||
@@ -27,6 +27,11 @@ LOCAL_SRC_FILES := \
|
|||||||
LOCAL_CFLAGS := \
|
LOCAL_CFLAGS := \
|
||||||
$(MY_WEBRTC_COMMON_DEFS)
|
$(MY_WEBRTC_COMMON_DEFS)
|
||||||
|
|
||||||
|
ifeq ($(ARCH_ARM_HAVE_NEON),true)
|
||||||
|
LOCAL_SRC_FILES += nsx_core_neon.c
|
||||||
|
LOCAL_CFLAGS += $(CFLAGS_NEON)
|
||||||
|
endif
|
||||||
|
|
||||||
LOCAL_C_INCLUDES := \
|
LOCAL_C_INCLUDES := \
|
||||||
$(LOCAL_PATH)/../interface \
|
$(LOCAL_PATH)/../interface \
|
||||||
$(LOCAL_PATH)/../../../utility \
|
$(LOCAL_PATH)/../../../utility \
|
||||||
|
|||||||
@@ -25,9 +25,9 @@ static const WebRtc_Word16 kRoundTable[16] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 25
|
|||||||
2048, 4096, 8192, 16384};
|
2048, 4096, 8192, 16384};
|
||||||
|
|
||||||
// Constants to compensate for shifting signal log(2^shifts).
|
// Constants to compensate for shifting signal log(2^shifts).
|
||||||
static const WebRtc_Word16 kLogTable[9] = {0, 177, 355, 532, 710, 887, 1065, 1242, 1420};
|
const WebRtc_Word16 WebRtcNsx_kLogTable[9] = {0, 177, 355, 532, 710, 887, 1065, 1242, 1420};
|
||||||
|
|
||||||
static const WebRtc_Word16 kCounterDiv[201] = {32767, 16384, 10923, 8192, 6554, 5461, 4681,
|
const WebRtc_Word16 WebRtcNsx_kCounterDiv[201] = {32767, 16384, 10923, 8192, 6554, 5461, 4681,
|
||||||
4096, 3641, 3277, 2979, 2731, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560,
|
4096, 3641, 3277, 2979, 2731, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560,
|
||||||
1489, 1425, 1365, 1311, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910,
|
1489, 1425, 1365, 1311, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910,
|
||||||
886, 862, 840, 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618,
|
886, 862, 840, 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618,
|
||||||
@@ -41,7 +41,7 @@ static const WebRtc_Word16 kCounterDiv[201] = {32767, 16384, 10923, 8192, 6554,
|
|||||||
189, 188, 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173,
|
189, 188, 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173,
|
||||||
172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163};
|
172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163};
|
||||||
|
|
||||||
static const WebRtc_Word16 kLogTableFrac[256] = {
|
const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256] = {
|
||||||
0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21,
|
0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21,
|
||||||
22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42,
|
22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42,
|
||||||
44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62,
|
44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62,
|
||||||
@@ -668,6 +668,7 @@ int WebRtcNsx_set_policy_core(NsxInst_t *inst, int mode)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID))
|
||||||
void WebRtcNsx_NoiseEstimation(NsxInst_t *inst, WebRtc_UWord16 *magn, WebRtc_UWord32 *noise,
|
void WebRtcNsx_NoiseEstimation(NsxInst_t *inst, WebRtc_UWord16 *magn, WebRtc_UWord32 *noise,
|
||||||
WebRtc_Word16 *qNoise)
|
WebRtc_Word16 *qNoise)
|
||||||
{
|
{
|
||||||
@@ -685,10 +686,10 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t *inst, WebRtc_UWord16 *magn, WebRtc_UWo
|
|||||||
tabind = inst->stages - inst->normData;
|
tabind = inst->stages - inst->normData;
|
||||||
if (tabind < 0)
|
if (tabind < 0)
|
||||||
{
|
{
|
||||||
logval = -kLogTable[-tabind];
|
logval = -WebRtcNsx_kLogTable[-tabind];
|
||||||
} else
|
} else
|
||||||
{
|
{
|
||||||
logval = kLogTable[tabind];
|
logval = WebRtcNsx_kLogTable[tabind];
|
||||||
}
|
}
|
||||||
|
|
||||||
// lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
|
// lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
|
||||||
@@ -702,7 +703,7 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t *inst, WebRtc_UWord16 *magn, WebRtc_UWo
|
|||||||
zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]);
|
zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]);
|
||||||
frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23);
|
frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23);
|
||||||
// log2(magn(i))
|
// log2(magn(i))
|
||||||
log2 = (WebRtc_Word16)(((31 - zeros) << 8) + kLogTableFrac[frac]);
|
log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]);
|
||||||
// log2(magn(i))*log(2)
|
// log2(magn(i))*log(2)
|
||||||
lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15);
|
lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15);
|
||||||
// + log(2^stages)
|
// + log(2^stages)
|
||||||
@@ -720,7 +721,7 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t *inst, WebRtc_UWord16 *magn, WebRtc_UWo
|
|||||||
|
|
||||||
// Get counter values from state
|
// Get counter values from state
|
||||||
counter = inst->noiseEstCounter[s];
|
counter = inst->noiseEstCounter[s];
|
||||||
countDiv = kCounterDiv[counter];
|
countDiv = WebRtcNsx_kCounterDiv[counter];
|
||||||
countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv);
|
countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv);
|
||||||
|
|
||||||
// quant_est(...)
|
// quant_est(...)
|
||||||
@@ -790,6 +791,7 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t *inst, WebRtc_UWord16 *magn, WebRtc_UWo
|
|||||||
}
|
}
|
||||||
(*qNoise) = (WebRtc_Word16)inst->qNoise;
|
(*qNoise) = (WebRtc_Word16)inst->qNoise;
|
||||||
}
|
}
|
||||||
|
#endif // !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID))
|
||||||
|
|
||||||
// Extract thresholds for feature parameters
|
// Extract thresholds for feature parameters
|
||||||
// histograms are computed over some window_size (given by window_pars)
|
// histograms are computed over some window_size (given by window_pars)
|
||||||
@@ -1045,7 +1047,8 @@ void WebRtcNsx_ComputeSpectralFlatness(NsxInst_t *inst, WebRtc_UWord16 *magn)
|
|||||||
frac = (WebRtc_Word16)(((WebRtc_UWord32)((WebRtc_UWord32)(magn[i]) << zeros)
|
frac = (WebRtc_Word16)(((WebRtc_UWord32)((WebRtc_UWord32)(magn[i]) << zeros)
|
||||||
& 0x7FFFFFFF) >> 23);
|
& 0x7FFFFFFF) >> 23);
|
||||||
// log2(magn(i))
|
// log2(magn(i))
|
||||||
tmpU32 = (WebRtc_UWord32)(((31 - zeros) << 8) + kLogTableFrac[frac]); // Q8
|
tmpU32 = (WebRtc_UWord32)(((31 - zeros) << 8)
|
||||||
|
+ WebRtcNsx_kLogTableFrac[frac]); // Q8
|
||||||
avgSpectralFlatnessNum += tmpU32; // Q8
|
avgSpectralFlatnessNum += tmpU32; // Q8
|
||||||
} else
|
} else
|
||||||
{
|
{
|
||||||
@@ -1059,7 +1062,7 @@ void WebRtcNsx_ComputeSpectralFlatness(NsxInst_t *inst, WebRtc_UWord16 *magn)
|
|||||||
zeros = WebRtcSpl_NormU32(avgSpectralFlatnessDen);
|
zeros = WebRtcSpl_NormU32(avgSpectralFlatnessDen);
|
||||||
frac = (WebRtc_Word16)(((avgSpectralFlatnessDen << zeros) & 0x7FFFFFFF) >> 23);
|
frac = (WebRtc_Word16)(((avgSpectralFlatnessDen << zeros) & 0x7FFFFFFF) >> 23);
|
||||||
// log2(avgSpectralFlatnessDen)
|
// log2(avgSpectralFlatnessDen)
|
||||||
tmp32 = (WebRtc_Word32)(((31 - zeros) << 8) + kLogTableFrac[frac]); // Q8
|
tmp32 = (WebRtc_Word32)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); // Q8
|
||||||
logCurSpectralFlatness = (WebRtc_Word32)avgSpectralFlatnessNum;
|
logCurSpectralFlatness = (WebRtc_Word32)avgSpectralFlatnessNum;
|
||||||
logCurSpectralFlatness += ((WebRtc_Word32)(inst->stages - 1) << (inst->stages + 7)); // Q(8+stages-1)
|
logCurSpectralFlatness += ((WebRtc_Word32)(inst->stages - 1) << (inst->stages + 7)); // Q(8+stages-1)
|
||||||
logCurSpectralFlatness -= (tmp32 << (inst->stages - 1));
|
logCurSpectralFlatness -= (tmp32 << (inst->stages - 1));
|
||||||
@@ -1551,7 +1554,7 @@ void WebRtcNsx_DataAnalysis(NsxInst_t *inst, short *speechFrame, WebRtc_UWord16
|
|||||||
frac = (WebRtc_Word16)((((WebRtc_UWord32)magnU16[inst->anaLen2] << zeros) &
|
frac = (WebRtc_Word16)((((WebRtc_UWord32)magnU16[inst->anaLen2] << zeros) &
|
||||||
0x7FFFFFFF) >> 23); // Q8
|
0x7FFFFFFF) >> 23); // Q8
|
||||||
// log2(magnU16(i)) in Q8
|
// log2(magnU16(i)) in Q8
|
||||||
log2 = (WebRtc_Word16)(((31 - zeros) << 8) + kLogTableFrac[frac]);
|
log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]);
|
||||||
}
|
}
|
||||||
|
|
||||||
sum_log_magn = (WebRtc_Word32)log2; // Q8
|
sum_log_magn = (WebRtc_Word32)log2; // Q8
|
||||||
@@ -1594,7 +1597,8 @@ void WebRtcNsx_DataAnalysis(NsxInst_t *inst, short *speechFrame, WebRtc_UWord16
|
|||||||
frac = (WebRtc_Word16)((((WebRtc_UWord32)magnU16[i] << zeros) &
|
frac = (WebRtc_Word16)((((WebRtc_UWord32)magnU16[i] << zeros) &
|
||||||
0x7FFFFFFF) >> 23);
|
0x7FFFFFFF) >> 23);
|
||||||
// log2(magnU16(i)) in Q8
|
// log2(magnU16(i)) in Q8
|
||||||
log2 = (WebRtc_Word16)(((31 - zeros) << 8) + kLogTableFrac[frac]);
|
log2 = (WebRtc_Word16)(((31 - zeros) << 8)
|
||||||
|
+ WebRtcNsx_kLogTableFrac[frac]);
|
||||||
}
|
}
|
||||||
sum_log_magn += (WebRtc_Word32)log2; // Q8
|
sum_log_magn += (WebRtc_Word32)log2; // Q8
|
||||||
// sum_log_i_log_magn in Q17
|
// sum_log_i_log_magn in Q17
|
||||||
|
|||||||
@@ -162,6 +162,18 @@ int WebRtcNsx_set_policy_core(NsxInst_t *inst, int mode);
|
|||||||
int WebRtcNsx_ProcessCore(NsxInst_t *inst, short *inFrameLow, short *inFrameHigh,
|
int WebRtcNsx_ProcessCore(NsxInst_t *inst, short *inFrameLow, short *inFrameHigh,
|
||||||
short *outFrameLow, short *outFrameHigh);
|
short *outFrameLow, short *outFrameHigh);
|
||||||
|
|
||||||
|
/****************************************************************************
|
||||||
|
* Internal functions and variable declarations shared with optimized code.
|
||||||
|
*/
|
||||||
|
void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t *inst, int offset);
|
||||||
|
|
||||||
|
void WebRtcNsx_NoiseEstimation(NsxInst_t *inst, WebRtc_UWord16 *magn, WebRtc_UWord32 *noise,
|
||||||
|
WebRtc_Word16 *qNoise);
|
||||||
|
|
||||||
|
extern const WebRtc_Word16 WebRtcNsx_kLogTable[9];
|
||||||
|
extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256];
|
||||||
|
extern const WebRtc_Word16 WebRtcNsx_kCounterDiv[201];
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
225
src/modules/audio_processing/ns/main/source/nsx_core_neon.c
Normal file
225
src/modules/audio_processing/ns/main/source/nsx_core_neon.c
Normal file
@@ -0,0 +1,225 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)
|
||||||
|
|
||||||
|
#include "nsx_core.h"
|
||||||
|
#include <arm_neon.h>
|
||||||
|
|
||||||
|
void WebRtcNsx_NoiseEstimation(NsxInst_t *inst, WebRtc_UWord16 *magn, WebRtc_UWord32 *noise,
|
||||||
|
WebRtc_Word16 *qNoise)
|
||||||
|
{
|
||||||
|
WebRtc_Word32 numerator;
|
||||||
|
|
||||||
|
WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac;
|
||||||
|
WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
|
||||||
|
WebRtc_Word16 log2Const = 22713;
|
||||||
|
WebRtc_Word16 widthFactor = 21845;
|
||||||
|
|
||||||
|
int i, s, offset;
|
||||||
|
|
||||||
|
numerator = FACTOR_Q16;
|
||||||
|
|
||||||
|
tabind = inst->stages - inst->normData;
|
||||||
|
if (tabind < 0)
|
||||||
|
{
|
||||||
|
logval = -WebRtcNsx_kLogTable[-tabind];
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
logval = WebRtcNsx_kLogTable[tabind];
|
||||||
|
}
|
||||||
|
|
||||||
|
// lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
|
||||||
|
// magn is in Q(-stages), and the real lmagn values are:
|
||||||
|
// real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
|
||||||
|
// lmagn in Q8
|
||||||
|
for (i = 0; i < inst->magnLen; i++)
|
||||||
|
{
|
||||||
|
if (magn[i])
|
||||||
|
{
|
||||||
|
zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]);
|
||||||
|
frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23);
|
||||||
|
// log2(magn(i))
|
||||||
|
log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]);
|
||||||
|
// log2(magn(i))*log(2)
|
||||||
|
lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15);
|
||||||
|
// + log(2^stages)
|
||||||
|
lmagn[i] += logval;
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
lmagn[i] = logval;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int16x4_t Q3_16x4 = vdup_n_s16(3);
|
||||||
|
int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8);
|
||||||
|
int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(widthFactor);
|
||||||
|
|
||||||
|
// Loop over simultaneous estimates
|
||||||
|
for (s = 0; s < SIMULT; s++)
|
||||||
|
{
|
||||||
|
offset = s * inst->magnLen;
|
||||||
|
|
||||||
|
// Get counter values from state
|
||||||
|
counter = inst->noiseEstCounter[s];
|
||||||
|
countDiv = WebRtcNsx_kCounterDiv[counter];
|
||||||
|
countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv);
|
||||||
|
|
||||||
|
// quant_est(...)
|
||||||
|
WebRtc_Word16 delta_[8];
|
||||||
|
int16x4_t tmp16x4_0;
|
||||||
|
int16x4_t tmp16x4_1;
|
||||||
|
int16x4_t countDiv_16x4 = vdup_n_s16(countDiv);
|
||||||
|
int16x8_t countProd_16x8 = vdupq_n_s16(countProd);
|
||||||
|
int16x8_t tmp16x8_0 = vdupq_n_s16(countDiv);
|
||||||
|
int16x8_t prod16x8 = vqrdmulhq_s16(WIDTHFACTOR_16x8, tmp16x8_0);
|
||||||
|
int16x8_t tmp16x8_1;
|
||||||
|
int16x8_t tmp16x8_2;
|
||||||
|
int16x8_t tmp16x8_3;
|
||||||
|
int16x8_t tmp16x8_4;
|
||||||
|
int16x8_t tmp16x8_5;
|
||||||
|
int32x4_t tmp32x4;
|
||||||
|
|
||||||
|
for (i = 0; i < inst->magnLen - 7; i += 8) {
|
||||||
|
// compute delta
|
||||||
|
tmp16x8_0 = vdupq_n_s16(FACTOR_Q7);
|
||||||
|
vst1q_s16(delta_, tmp16x8_0);
|
||||||
|
int j;
|
||||||
|
for (j = 0; j < 8; j++) {
|
||||||
|
if (inst->noiseEstDensity[offset + i + j] > 512)
|
||||||
|
delta_[j] = WebRtcSpl_DivW32W16ResW16(numerator,
|
||||||
|
inst->noiseEstDensity[offset + i + j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update log quantile estimate
|
||||||
|
|
||||||
|
// tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
|
||||||
|
tmp32x4 = vmull_s16(vld1_s16(&delta_[0]), countDiv_16x4);
|
||||||
|
tmp16x4_1 = vshrn_n_s32(tmp32x4, 14);
|
||||||
|
tmp32x4 = vmull_s16(vld1_s16(&delta_[4]), countDiv_16x4);
|
||||||
|
tmp16x4_0 = vshrn_n_s32(tmp32x4, 14);
|
||||||
|
tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // Keep for several lines.
|
||||||
|
|
||||||
|
// prepare for the "if" branch
|
||||||
|
// tmp16 += 2;
|
||||||
|
// tmp16_1 = (Word16)(tmp16>>2);
|
||||||
|
tmp16x8_1 = vrshrq_n_s16(tmp16x8_0, 2);
|
||||||
|
|
||||||
|
// inst->noiseEstLogQuantile[offset+i] + tmp16_1;
|
||||||
|
tmp16x8_2 = vld1q_s16(&inst->noiseEstLogQuantile[offset + i]); // Keep
|
||||||
|
tmp16x8_1 = vaddq_s16(tmp16x8_2, tmp16x8_1); // Keep for several lines
|
||||||
|
|
||||||
|
// Prepare for the "else" branch
|
||||||
|
// tmp16 += 1;
|
||||||
|
// tmp16_1 = (Word16)(tmp16>>1);
|
||||||
|
tmp16x8_0 = vrshrq_n_s16(tmp16x8_0, 1);
|
||||||
|
|
||||||
|
// tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1);
|
||||||
|
tmp32x4 = vmull_s16(vget_low_s16(tmp16x8_0), Q3_16x4);
|
||||||
|
tmp16x4_1 = vshrn_n_s32(tmp32x4, 1);
|
||||||
|
|
||||||
|
// tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1);
|
||||||
|
tmp32x4 = vmull_s16(vget_high_s16(tmp16x8_0), Q3_16x4);
|
||||||
|
tmp16x4_0 = vshrn_n_s32(tmp32x4, 1);
|
||||||
|
|
||||||
|
// inst->noiseEstLogQuantile[offset + i] - tmp16_2;
|
||||||
|
tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep
|
||||||
|
tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0);
|
||||||
|
|
||||||
|
// Do the if-else branches:
|
||||||
|
tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines
|
||||||
|
tmp16x8_5 = vsubq_s16(tmp16x8_3, tmp16x8_2);
|
||||||
|
__asm__("vcgt.s16 %q0, %q1, #0"::"w"(tmp16x8_4), "w"(tmp16x8_5));
|
||||||
|
__asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4));
|
||||||
|
__asm__("vbif %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4));
|
||||||
|
vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2);
|
||||||
|
|
||||||
|
// Update density estimate
|
||||||
|
// tmp16_1 + tmp16_2
|
||||||
|
tmp16x8_1 = vld1q_s16(&inst->noiseEstDensity[offset + i]);
|
||||||
|
tmp16x8_0 = vqrdmulhq_s16(tmp16x8_1, countProd_16x8);
|
||||||
|
tmp16x8_0 = vaddq_s16(tmp16x8_0, prod16x8);
|
||||||
|
|
||||||
|
// lmagn[i] - inst->noiseEstLogQuantile[offset + i]
|
||||||
|
tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2);
|
||||||
|
tmp16x8_3 = vabsq_s16(tmp16x8_3);
|
||||||
|
tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3);
|
||||||
|
__asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4));
|
||||||
|
vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1);
|
||||||
|
} // End loop over magnitude spectrum
|
||||||
|
|
||||||
|
for (; i < inst->magnLen; i++)
|
||||||
|
{
|
||||||
|
// compute delta
|
||||||
|
if (inst->noiseEstDensity[offset + i] > 512)
|
||||||
|
{
|
||||||
|
delta = WebRtcSpl_DivW32W16ResW16(numerator,
|
||||||
|
inst->noiseEstDensity[offset + i]);
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
delta = FACTOR_Q7;
|
||||||
|
}
|
||||||
|
|
||||||
|
// update log quantile estimate
|
||||||
|
tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
|
||||||
|
if (lmagn[i] > inst->noiseEstLogQuantile[offset + i])
|
||||||
|
{
|
||||||
|
// +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
|
||||||
|
// CounterDiv=1/inst->counter[s] in Q15
|
||||||
|
tmp16 += 2;
|
||||||
|
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2);
|
||||||
|
inst->noiseEstLogQuantile[offset + i] += tmp16no1;
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
tmp16 += 1;
|
||||||
|
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1);
|
||||||
|
// *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
|
||||||
|
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1);
|
||||||
|
inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// update density estimate
|
||||||
|
if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
|
||||||
|
< WIDTH_Q8)
|
||||||
|
{
|
||||||
|
tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||||
|
inst->noiseEstDensity[offset + i], countProd, 15);
|
||||||
|
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(widthFactor,
|
||||||
|
countDiv, 15);
|
||||||
|
inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
|
||||||
|
}
|
||||||
|
} // end loop over magnitude spectrum
|
||||||
|
|
||||||
|
if (counter >= END_STARTUP_LONG)
|
||||||
|
{
|
||||||
|
inst->noiseEstCounter[s] = 0;
|
||||||
|
if (inst->blockIndex >= END_STARTUP_LONG)
|
||||||
|
{
|
||||||
|
WebRtcNsx_UpdateNoiseEstimate(inst, offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inst->noiseEstCounter[s]++;
|
||||||
|
|
||||||
|
} // end loop over simultaneous estimates
|
||||||
|
|
||||||
|
// Sequentially update the noise during startup
|
||||||
|
if (inst->blockIndex < END_STARTUP_LONG)
|
||||||
|
{
|
||||||
|
WebRtcNsx_UpdateNoiseEstimate(inst, offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < inst->magnLen; i++)
|
||||||
|
{
|
||||||
|
noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise)
|
||||||
|
}
|
||||||
|
(*qNoise) = (WebRtc_Word16)inst->qNoise;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)
|
||||||
Reference in New Issue
Block a user