Refactored ARM specific code in Noise Suppression. Bit exact.
Review URL: https://webrtc-codereview.appspot.com/459006 git-svn-id: http://webrtc.googlecode.com/svn/trunk@2303 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
1755a57cbc
commit
0d321da7e1
46
src/build/generate_asm_header.py
Normal file
46
src/build/generate_asm_header.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
#
|
||||||
|
# Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||||
|
#
|
||||||
|
# Use of this source code is governed by a BSD-style license
|
||||||
|
# that can be found in the LICENSE file in the root of the source
|
||||||
|
# tree. An additional intellectual property rights grant can be found
|
||||||
|
# in the file PATENTS. All contributing project authors may
|
||||||
|
# be found in the AUTHORS file in the root of the source tree.
|
||||||
|
|
||||||
|
"""This script generates a C header file of offsets from an ARM assembler file.
|
||||||
|
|
||||||
|
It parses an ARM assembler generated .S file, finds declarations of variables
|
||||||
|
whose names start with the string specified as the third argument in the
|
||||||
|
command-line, translates the variable names and values into constant defines and
|
||||||
|
writes them into a header file.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def usage():
|
||||||
|
print("Usage: generate_asm_header.py " +
|
||||||
|
"<input filename> <output filename> <variable name pattern>")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def main(argv):
|
||||||
|
if len(argv) != 3:
|
||||||
|
usage()
|
||||||
|
|
||||||
|
infile = open(argv[0])
|
||||||
|
outfile = open(argv[1], 'w')
|
||||||
|
|
||||||
|
for line in infile: # Iterate though all the lines in the input file.
|
||||||
|
if line.startswith(argv[2]):
|
||||||
|
outfile.write('#define ')
|
||||||
|
outfile.write(line.split(':')[0]) # Write the constant name.
|
||||||
|
outfile.write(' ')
|
||||||
|
|
||||||
|
if line.find('.word') >= 0:
|
||||||
|
outfile.write(line.split('.word')[1]) # Write the constant value.
|
||||||
|
|
||||||
|
infile.close()
|
||||||
|
outfile.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main(sys.argv[1:])
|
@ -23,7 +23,7 @@ LOCAL_SRC_FILES := \
|
|||||||
nsx_core.c
|
nsx_core.c
|
||||||
|
|
||||||
# Files for floating point.
|
# Files for floating point.
|
||||||
# noise_suppression.c ns_core.c
|
# noise_suppression.c ns_core.c
|
||||||
|
|
||||||
# Flags passed to both C and C++ files.
|
# Flags passed to both C and C++ files.
|
||||||
LOCAL_CFLAGS := $(MY_WEBRTC_COMMON_DEFS)
|
LOCAL_CFLAGS := $(MY_WEBRTC_COMMON_DEFS)
|
||||||
@ -57,8 +57,20 @@ LOCAL_ARM_MODE := arm
|
|||||||
LOCAL_MODULE_CLASS := STATIC_LIBRARIES
|
LOCAL_MODULE_CLASS := STATIC_LIBRARIES
|
||||||
LOCAL_MODULE := libwebrtc_ns_neon
|
LOCAL_MODULE := libwebrtc_ns_neon
|
||||||
LOCAL_MODULE_TAGS := optional
|
LOCAL_MODULE_TAGS := optional
|
||||||
|
GEN := $(LOCAL_PATH)/nsx_core_neon_offsets.h
|
||||||
|
|
||||||
LOCAL_SRC_FILES := nsx_core_neon.c
|
# Generate a header file nsx_core_neon_offsets.h which will be included in
|
||||||
|
# assembly file nsx_core_neon.S, from file nsx_core_neon_offsets.c.
|
||||||
|
$(GEN): $(LOCAL_PATH)/../../../../src/build/generate_asm_header.py \
|
||||||
|
$(intermediates)/nsx_core_neon_offsets.S
|
||||||
|
@python $^ $@ offset_nsx_
|
||||||
|
|
||||||
|
$(intermediates)/nsx_core_neon_offsets.S: $(LOCAL_PATH)/nsx_core_neon_offsets.c
|
||||||
|
@$(TARGET_CC) $(addprefix -I, $(LOCAL_INCLUDES)) $(addprefix -isystem ,\
|
||||||
|
$(TARGET_C_INCLUDES)) -S -o $@ $^
|
||||||
|
|
||||||
|
LOCAL_GENERATED_SOURCES := $(GEN)
|
||||||
|
LOCAL_SRC_FILES := nsx_core_neon.S
|
||||||
|
|
||||||
# Flags passed to both C and C++ files.
|
# Flags passed to both C and C++ files.
|
||||||
LOCAL_CFLAGS := \
|
LOCAL_CFLAGS := \
|
||||||
@ -72,6 +84,8 @@ LOCAL_C_INCLUDES := \
|
|||||||
$(LOCAL_PATH)/../../.. \
|
$(LOCAL_PATH)/../../.. \
|
||||||
$(LOCAL_PATH)/../../../common_audio/signal_processing/include
|
$(LOCAL_PATH)/../../../common_audio/signal_processing/include
|
||||||
|
|
||||||
|
LOCAL_INCLUDES := $(LOCAL_C_INCLUDES)
|
||||||
|
|
||||||
ifndef NDK_ROOT
|
ifndef NDK_ROOT
|
||||||
include external/stlport/libstlport.mk
|
include external/stlport/libstlport.mk
|
||||||
endif
|
endif
|
||||||
|
@ -435,6 +435,18 @@ AnalysisUpdate WebRtcNsx_AnalysisUpdate;
|
|||||||
Denormalize WebRtcNsx_Denormalize;
|
Denormalize WebRtcNsx_Denormalize;
|
||||||
CreateComplexBuffer WebRtcNsx_CreateComplexBuffer;
|
CreateComplexBuffer WebRtcNsx_CreateComplexBuffer;
|
||||||
|
|
||||||
|
#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
|
||||||
|
// Initialize function pointers for ARM Neon platform.
|
||||||
|
static void WebRtcNsx_InitNeon(void) {
|
||||||
|
WebRtcNsx_NoiseEstimation = WebRtcNsx_NoiseEstimationNeon;
|
||||||
|
WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrumNeon;
|
||||||
|
WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdateNeon;
|
||||||
|
WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdateNeon;
|
||||||
|
WebRtcNsx_Denormalize = WebRtcNsx_DenormalizeNeon;
|
||||||
|
WebRtcNsx_CreateComplexBuffer = WebRtcNsx_CreateComplexBufferNeon;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Update the noise estimation information.
|
// Update the noise estimation information.
|
||||||
static void UpdateNoiseEstimate(NsxInst_t* inst, int offset) {
|
static void UpdateNoiseEstimate(NsxInst_t* inst, int offset) {
|
||||||
WebRtc_Word32 tmp32no1 = 0;
|
WebRtc_Word32 tmp32no1 = 0;
|
||||||
@ -1881,8 +1893,11 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
|
|||||||
int q_domain_to_use = 0;
|
int q_domain_to_use = 0;
|
||||||
|
|
||||||
// Code for ARMv7-Neon platform assumes the following:
|
// Code for ARMv7-Neon platform assumes the following:
|
||||||
|
assert(inst->anaLen > 0);
|
||||||
|
assert(inst->anaLen2 > 0);
|
||||||
assert(inst->anaLen % 16 == 0);
|
assert(inst->anaLen % 16 == 0);
|
||||||
assert(inst->anaLen2 % 8 == 0);
|
assert(inst->anaLen2 % 8 == 0);
|
||||||
|
assert(inst->blockLen10ms > 0);
|
||||||
assert(inst->blockLen10ms % 16 == 0);
|
assert(inst->blockLen10ms % 16 == 0);
|
||||||
assert(inst->magnLen == inst->anaLen2 + 1);
|
assert(inst->magnLen == inst->anaLen2 + 1);
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* Use of this source code is governed by a BSD-style license
|
* Use of this source code is governed by a BSD-style license
|
||||||
* that can be found in the LICENSE file in the root of the source
|
* that can be found in the LICENSE file in the root of the source
|
||||||
@ -206,10 +206,26 @@ typedef void (*CreateComplexBuffer)(NsxInst_t* inst,
|
|||||||
int16_t* out);
|
int16_t* out);
|
||||||
extern CreateComplexBuffer WebRtcNsx_CreateComplexBuffer;
|
extern CreateComplexBuffer WebRtcNsx_CreateComplexBuffer;
|
||||||
|
|
||||||
/****************************************************************************
|
#if (defined WEBRTC_DETECT_ARM_NEON) || defined (WEBRTC_ARCH_ARM_NEON)
|
||||||
* Initialization of the above function pointers for ARM Neon.
|
// For the above function pointers, functions for generic platforms are declared
|
||||||
*/
|
// and defined as static in file nsx_core.c, while those for ARM Neon platforms
|
||||||
void WebRtcNsx_InitNeon(void);
|
// are declared below and defined in file nsx_core_neon.S.
|
||||||
|
void WebRtcNsx_NoiseEstimationNeon(NsxInst_t* inst,
|
||||||
|
uint16_t* magn,
|
||||||
|
uint32_t* noise,
|
||||||
|
int16_t* q_noise);
|
||||||
|
void WebRtcNsx_CreateComplexBufferNeon(NsxInst_t* inst,
|
||||||
|
int16_t* in,
|
||||||
|
int16_t* out);
|
||||||
|
void WebRtcNsx_SynthesisUpdateNeon(NsxInst_t* inst,
|
||||||
|
int16_t* out_frame,
|
||||||
|
int16_t gain_factor);
|
||||||
|
void WebRtcNsx_AnalysisUpdateNeon(NsxInst_t* inst,
|
||||||
|
int16_t* out,
|
||||||
|
int16_t* new_speech);
|
||||||
|
void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor);
|
||||||
|
void WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buff);
|
||||||
|
#endif
|
||||||
|
|
||||||
extern const WebRtc_Word16 WebRtcNsx_kLogTable[9];
|
extern const WebRtc_Word16 WebRtcNsx_kLogTable[9];
|
||||||
extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256];
|
extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256];
|
||||||
|
682
src/modules/audio_processing/ns/nsx_core_neon.S
Normal file
682
src/modules/audio_processing/ns/nsx_core_neon.S
Normal file
@ -0,0 +1,682 @@
|
|||||||
|
@
|
||||||
|
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||||
|
@
|
||||||
|
@ Use of this source code is governed by a BSD-style license
|
||||||
|
@ that can be found in the LICENSE file in the root of the source
|
||||||
|
@ tree. An additional intellectual property rights grant can be found
|
||||||
|
@ in the file PATENTS. All contributing project authors may
|
||||||
|
@ be found in the AUTHORS file in the root of the source tree.
|
||||||
|
@
|
||||||
|
|
||||||
|
@ nsx_core_neon.s
|
||||||
|
@ This file contains some functions in NS, optimized for ARM Neon
|
||||||
|
@ platforms. Reference C code is in file nsx_core.c. Bit-exact.
|
||||||
|
|
||||||
|
.arch armv7-a
|
||||||
|
.fpu neon
|
||||||
|
|
||||||
|
#include "nsx_defines.h"
|
||||||
|
#include "nsx_core_neon_offsets.h"
|
||||||
|
|
||||||
|
.global WebRtcNsx_NoiseEstimationNeon
|
||||||
|
.global WebRtcNsx_PrepareSpectrumNeon
|
||||||
|
.global WebRtcNsx_SynthesisUpdateNeon
|
||||||
|
.global WebRtcNsx_AnalysisUpdateNeon
|
||||||
|
.global WebRtcNsx_DenormalizeNeon
|
||||||
|
.global WebRtcNsx_CreateComplexBufferNeon
|
||||||
|
|
||||||
|
@ void NoiseEstimationNeon(NsxInst_t* inst,
|
||||||
|
@ uint16_t* magn,
|
||||||
|
@ uint32_t* noise,
|
||||||
|
@ int16_t* q_noise);
|
||||||
|
|
||||||
|
@ Register usage (across major loops of NoiseEstimationNeon()):
|
||||||
|
@ r0-r3: function arguments, and scratch registers.
|
||||||
|
@ r4: &inst
|
||||||
|
@ r5: &noiseEstLogQuantile[]
|
||||||
|
@ r6: inst->magnLen
|
||||||
|
@ r7: offset
|
||||||
|
@ r8: s, the loop counter for the LOOP_SIMULT
|
||||||
|
@ r9: &inst->noiseEstDensity[]
|
||||||
|
@ r10: &inst->noiseEstCounter[]
|
||||||
|
@ r11: countDiv
|
||||||
|
@ r12: i, the loop counter for LOOP_NOISEESTIMATION_MAGNLEN_INNER
|
||||||
|
|
||||||
|
WebRtcNsx_NoiseEstimationNeon:
|
||||||
|
.fnstart
|
||||||
|
.save {r4-r11, r14}
|
||||||
|
.vsave {d8-d15}
|
||||||
|
.pad #(16 + (HALF_ANAL_BLOCKL + 3) / 4 * 8)
|
||||||
|
|
||||||
|
push {r4-r11, r14}
|
||||||
|
vpush {d8-d15}
|
||||||
|
sub sp, #(16 + (HALF_ANAL_BLOCKL + 3) / 4 * 8)
|
||||||
|
|
||||||
|
@ [sp, #0]: logval
|
||||||
|
@ [sp, #4]: noise
|
||||||
|
@ [sp, #8]: q_noise
|
||||||
|
@ [sp, #12]: factor
|
||||||
|
@ [sp, #16 ~ #(16 + (HALF_ANAL_BLOCKL + 3) / 4 * 8)]: lmagn[HALF_ANAL_BLOCKL]
|
||||||
|
|
||||||
|
str r2, [sp, #4] @ noise
|
||||||
|
str r3, [sp, #8] @ q_noise
|
||||||
|
movw r4, #offset_nsx_normData
|
||||||
|
ldr r2, [r0, #offset_nsx_stages] @ inst->stages
|
||||||
|
ldr r4, [r0, r4] @ inst->normData
|
||||||
|
ldr r12, =WebRtcNsx_kLogTable
|
||||||
|
subs r3, r2, r4 @ tabind = inst->stages - inst->normData;
|
||||||
|
ldr r5, [r0, #offset_nsx_magnLen] @ magnLen
|
||||||
|
rsblt r3, #0
|
||||||
|
lsl r3, #1
|
||||||
|
ldrh r3, [r12, r3] @ logval = WebRtcNsx_kLogTable[tabind];
|
||||||
|
add r12, sp, #16 @ lmagn[]
|
||||||
|
rsblt r3, #0 @ logval = -WebRtcNsx_kLogTable[-tabind];
|
||||||
|
str r3, [sp]
|
||||||
|
vdup.16 q15, r3
|
||||||
|
|
||||||
|
ldr r9, =WebRtcNsx_kLogTableFrac
|
||||||
|
|
||||||
|
LOOP_SET_LMAGN:
|
||||||
|
ldrh r2, [r1], #2 @ magn[i]
|
||||||
|
cmp r2, #0
|
||||||
|
streqh r3, [r12], #2 @ lmagn[i] = logval;
|
||||||
|
beq CHECK_LMAGN_COUNTER
|
||||||
|
|
||||||
|
clz r6, r2
|
||||||
|
mov r4, r6 @ zeros
|
||||||
|
rsb r6, #31
|
||||||
|
lsl r2, r4
|
||||||
|
ubfx r4, r2, #23, #8
|
||||||
|
mov r2, r4, lsl #1
|
||||||
|
ldrh r4, [r9, r2] @ WebRtcNsx_kLogTableFrac[frac]
|
||||||
|
add r7, r4, r6, lsl #8 @ log2
|
||||||
|
movw r2, #22713 @ log2_const
|
||||||
|
smulbb r2, r7, r2
|
||||||
|
add r2, r3, r2, lsr #15
|
||||||
|
strh r2, [r12], #2 @ lmagn[i]
|
||||||
|
|
||||||
|
CHECK_LMAGN_COUNTER:
|
||||||
|
subs r5, #1
|
||||||
|
bgt LOOP_SET_LMAGN
|
||||||
|
|
||||||
|
movw r3, #21845 @ width_factor
|
||||||
|
vdup.16 q5, r3
|
||||||
|
vmov.s16 q14, #WIDTH_Q8
|
||||||
|
|
||||||
|
movw r5, #offset_nsx_noiseEstLogQuantile
|
||||||
|
movw r7, #offset_nsx_blockIndex
|
||||||
|
movw r9, #offset_nsx_noiseEstDensity
|
||||||
|
add r5, r0
|
||||||
|
ldr r6, [r0, #offset_nsx_magnLen]
|
||||||
|
ldr r7, [r0, r7]
|
||||||
|
add r9, r0
|
||||||
|
cmp r7, #END_STARTUP_LONG
|
||||||
|
add r10, r0, #offset_nsx_noiseEstCounter
|
||||||
|
movge r7, #FACTOR_Q7
|
||||||
|
movlt r7, #FACTOR_Q7_STARTUP
|
||||||
|
mov r4, r0
|
||||||
|
str r7, [sp, #12] @ factor
|
||||||
|
mov r8, #SIMULT
|
||||||
|
mov r7, #0
|
||||||
|
|
||||||
|
LOOP_SIMULT:
|
||||||
|
ldrsh r1, [r10] @ inst->noiseEstCounter[s]
|
||||||
|
ldr r3, =WebRtcNsx_kCounterDiv
|
||||||
|
mov r11, r1, lsl #1 @ counter
|
||||||
|
ldrh r11, [r3, r11] @ countDiv = WebRtcNsx_kCounterDiv[counter];
|
||||||
|
sub r12, r6, #1 @ Loop counter.
|
||||||
|
smulbb r3, r1, r11 @ countProd
|
||||||
|
vdup.16 q11, r11
|
||||||
|
|
||||||
|
vqrdmulh.s16 q11, q5, q11 @ WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
|
||||||
|
@ width_factor, countDiv, 15);
|
||||||
|
vdup.16 d24, r11
|
||||||
|
vdup.16 d25, r3
|
||||||
|
|
||||||
|
ldr r3, [sp, #12] @ factor
|
||||||
|
add r1, sp, #16 @ &lmagn[0]
|
||||||
|
vdup.16 q9, r3
|
||||||
|
vmov.i16 q13, #512
|
||||||
|
vmov.i16 q7, #15
|
||||||
|
vmov.i32 q6, #FACTOR_Q16
|
||||||
|
|
||||||
|
LOOP_NOISEESTIMATION_MAGNLEN_INNER:
|
||||||
|
vld1.16 {q0}, [r9] @ noiseEstDensity[offset + i]
|
||||||
|
|
||||||
|
@ Compute delta in the next two blocks.
|
||||||
|
vclz.i16 q4, q0
|
||||||
|
vsub.i16 q4, q4, q7 @ Value of the shift factors; likely negative.
|
||||||
|
vmovl.s16 q3, d8
|
||||||
|
vmovl.s16 q2, d9
|
||||||
|
|
||||||
|
vshl.s32 q1, q6, q3
|
||||||
|
vmovn.i32 d8, q1 @ d8 holds shifted FACTOR_Q16.
|
||||||
|
vshl.s32 q1, q6, q2
|
||||||
|
vcgt.s16 q3, q0, q13 @ Compare noiseEstDensity to 512.
|
||||||
|
vmovn.i32 d9, q1 @ d9 holds shifted FACTOR_Q16.
|
||||||
|
vmov.i16 q1, q9
|
||||||
|
vbit.s16 q1, q4, q3 @ If bigger than 512, delta = shifted FACTOR_Q16.
|
||||||
|
|
||||||
|
vmull.s16 q8, d3, d24
|
||||||
|
vmull.s16 q4, d2, d24
|
||||||
|
vshrn.i32 d2, q4, #14
|
||||||
|
vshrn.i32 d3, q8, #14
|
||||||
|
|
||||||
|
vrshr.s16 q3, q1, #1
|
||||||
|
vrshr.s16 q8, q1, #2
|
||||||
|
vmull.s16 q4, d7, d28
|
||||||
|
vmull.s16 q3, d6, d28
|
||||||
|
vld1.16 {q10}, [r5] @ inst->noiseEstLogQuantile[offset + i]
|
||||||
|
vshrn.i32 d4, q3, #1
|
||||||
|
vshrn.i32 d5, q4, #1
|
||||||
|
|
||||||
|
vld1.16 {q3}, [r1]! @ lmagn[i]
|
||||||
|
vsub.i16 q4, q10, q2
|
||||||
|
vadd.i16 q8, q10, q8
|
||||||
|
vsub.i16 q2, q3, q10
|
||||||
|
vmax.s16 q4, q4, q15
|
||||||
|
vcgt.s16 q1, q2, #0
|
||||||
|
vbit q10, q8, q1
|
||||||
|
vbif q10, q4, q1
|
||||||
|
|
||||||
|
vsub.i16 q1, q3, q10
|
||||||
|
vst1.16 {q10}, [r5]! @ inst->noiseEstLogQuantile[offset + i]
|
||||||
|
vabs.s16 q4, q1
|
||||||
|
vqrdmulh.s16 d2, d0, d25
|
||||||
|
vqrdmulh.s16 d3, d1, d25
|
||||||
|
vcgt.s16 q4, q14, q4
|
||||||
|
vadd.i16 q1, q1, q11
|
||||||
|
vbit q0, q1, q4
|
||||||
|
subs r12, #8
|
||||||
|
vst1.16 {q0}, [r9]! @ noiseEstDensity[offset + i]
|
||||||
|
bgt LOOP_NOISEESTIMATION_MAGNLEN_INNER
|
||||||
|
|
||||||
|
@
|
||||||
|
@ Last iteration over magnitude spectrum.
|
||||||
|
@
|
||||||
|
|
||||||
|
COMPUTE_DELTA:
|
||||||
|
ldrsh r2, [r9] @ inst->noiseEstDensity[offset + i]
|
||||||
|
cmp r2, #512
|
||||||
|
bgt COMPUTE_DELTA_BIGGER_DENSITY
|
||||||
|
|
||||||
|
movw r2, #offset_nsx_blockIndex
|
||||||
|
ldr r0, [r4, r2]
|
||||||
|
cmp r0, #END_STARTUP_LONG
|
||||||
|
movge r0, #FACTOR_Q7 @ delta
|
||||||
|
movlt r0, #FACTOR_Q7_STARTUP @ delta
|
||||||
|
b UPDATE_LOG_QUANTILE_ESTIMATE
|
||||||
|
|
||||||
|
COMPUTE_DELTA_BIGGER_DENSITY:
|
||||||
|
clz r2, r2
|
||||||
|
rsb r0, r2, #31 @ 14 - factor
|
||||||
|
mov r2, #FACTOR_Q16
|
||||||
|
mov r0, r2, lsr r0 @ FACTOR_Q16 >> (14 - factor)
|
||||||
|
|
||||||
|
UPDATE_LOG_QUANTILE_ESTIMATE:
|
||||||
|
smulbb r12, r0, r11
|
||||||
|
ldrsh r1, [r1] @ lmagn[i]
|
||||||
|
ubfx r12, r12, #14, #16 @ tmp16
|
||||||
|
ldrsh r2, [r5] @ inst->noiseEstLogQuantile[offset + i]
|
||||||
|
cmp r1, r2
|
||||||
|
bgt UPDATE_LOG_QUANTILE_ESTIMATE_BIGGER_LMAGN
|
||||||
|
|
||||||
|
add r12, #1
|
||||||
|
ldr r3, [sp] @ logval
|
||||||
|
mov r0, r12, lsr #1 @ tmp16no1
|
||||||
|
mov r12, #3
|
||||||
|
smulbb r12, r0, r12 @ tmp16no2
|
||||||
|
sub r2, r12, lsr #1
|
||||||
|
cmp r3, r2
|
||||||
|
ldrgt r2, [sp]
|
||||||
|
ldrgt r3, [sp]
|
||||||
|
b UPDATE_LOG_QUANTILE_ESTIMATE_STORE
|
||||||
|
|
||||||
|
UPDATE_LOG_QUANTILE_ESTIMATE_BIGGER_LMAGN:
|
||||||
|
add r3, r12, #2
|
||||||
|
add r2, r3, lsr #2
|
||||||
|
|
||||||
|
UPDATE_LOG_QUANTILE_ESTIMATE_STORE:
|
||||||
|
vmov.s16 r0, d25[0] @ countProd
|
||||||
|
strh r2, [r5]
|
||||||
|
add r5, #2 @ increment &noiseEstLogQuantile[offset + i]
|
||||||
|
|
||||||
|
UPDATE_DENSITY_ESTIMATE:
|
||||||
|
subs r12, r1, r2
|
||||||
|
rsblt r12, #0
|
||||||
|
cmp r12, #WIDTH_Q8
|
||||||
|
bge UPDATE_DENSITY_ESTIMATE_CHECK_COUNTER
|
||||||
|
|
||||||
|
movw r3, #21845 @ width_factor
|
||||||
|
ldrh r12, [r9] @ inst->noiseEstDensity[offset + i]
|
||||||
|
smulbb r2, r3, r11
|
||||||
|
smulbb r1, r12, r0
|
||||||
|
add r0, r2, #1 << 14 @ Rounding
|
||||||
|
add r12, r1, #1 << 14
|
||||||
|
mov r1, r12, lsr #15
|
||||||
|
add r3, r1, r0, lsr #15
|
||||||
|
strh r3, [r9] @ inst->noiseEstDensity[offset + i]
|
||||||
|
|
||||||
|
UPDATE_DENSITY_ESTIMATE_CHECK_COUNTER:
|
||||||
|
add r9, #2 @ updata &noiseEstDensity[offset + i]
|
||||||
|
ldrsh r3, [r10] @ inst->noiseEstCounter[s]
|
||||||
|
cmp r3, #END_STARTUP_LONG
|
||||||
|
blt POST_UPDATE_DENSITY_ESTIMATE
|
||||||
|
|
||||||
|
movw r2, #offset_nsx_blockIndex
|
||||||
|
mov r12, #0
|
||||||
|
ldr r2, [r4, r2]
|
||||||
|
strh r12, [r10]
|
||||||
|
cmp r2, #END_STARTUP_LONG
|
||||||
|
blt POST_UPDATE_DENSITY_ESTIMATE
|
||||||
|
|
||||||
|
mov r0, r4
|
||||||
|
mov r1, r7
|
||||||
|
bl UpdateNoiseEstimateNeon
|
||||||
|
|
||||||
|
POST_UPDATE_DENSITY_ESTIMATE:
|
||||||
|
ldrh r3, [r10]
|
||||||
|
add r3, #1
|
||||||
|
strh r3, [r10], #2
|
||||||
|
subs r8, #1
|
||||||
|
add r7, r6 @ offset += inst->magnLen;
|
||||||
|
bgt LOOP_SIMULT
|
||||||
|
|
||||||
|
movw r2, #offset_nsx_blockIndex
|
||||||
|
ldr r2, [r4, r2]
|
||||||
|
cmp r2, #END_STARTUP_LONG
|
||||||
|
bge UPDATE_NOISE
|
||||||
|
|
||||||
|
sub r1, r7, r6
|
||||||
|
mov r0, r4
|
||||||
|
bl UpdateNoiseEstimateNeon
|
||||||
|
|
||||||
|
UPDATE_NOISE:
|
||||||
|
movw r1, #offset_nsx_noiseEstQuantile
|
||||||
|
add r1, r4
|
||||||
|
ldr r2, [sp, #4]
|
||||||
|
|
||||||
|
@ Initial value of loop counter r6 = inst->magnLen.
|
||||||
|
LOOP_UPDATE_NOISE:
|
||||||
|
ldrsh r0, [r1], #2
|
||||||
|
subs r6, #1
|
||||||
|
str r0, [r2], #4
|
||||||
|
bgt LOOP_UPDATE_NOISE
|
||||||
|
|
||||||
|
UPDATE_Q_NOISE:
|
||||||
|
movw r2, #offset_nsx_qNoise
|
||||||
|
ldr r1, [sp, #8]
|
||||||
|
ldrh r2, [r4, r2]
|
||||||
|
strh r2, [r1]
|
||||||
|
|
||||||
|
add sp, #(16 + (HALF_ANAL_BLOCKL + 3) / 4 * 8)
|
||||||
|
vpop {d8-d15}
|
||||||
|
pop {r4-r11, pc}
|
||||||
|
.fnend
|
||||||
|
|
||||||
|
@ static void UpdateNoiseEstimateNeon(NsxInst_t* inst, int offset);
|
||||||
|
@ Neon registers touched: q0-q3, q8-q13.
|
||||||
|
UpdateNoiseEstimateNeon:
|
||||||
|
.fnstart
|
||||||
|
.save {r4, r5, r6, r14}
|
||||||
|
|
||||||
|
push {r4, r5, r6, r14}
|
||||||
|
mov r5, r0
|
||||||
|
|
||||||
|
vmov.i32 q10, #21
|
||||||
|
vmov.i32 q11, #0x1FFFFF
|
||||||
|
vmov.i32 q9, #0x200000
|
||||||
|
|
||||||
|
movw r0, #offset_nsx_noiseEstLogQuantile
|
||||||
|
movw r6, #offset_nsx_magnLen
|
||||||
|
add r0, r5 @ &inst->noiseEstLogQuantile
|
||||||
|
add r4, r0, r1, lsl #1 @ &inst->noiseEstLogQuantile[offset]
|
||||||
|
ldrsh r6, [r5, r6] @ &inst->magnLen
|
||||||
|
|
||||||
|
mov r0, r4
|
||||||
|
mov r1, r6
|
||||||
|
bl WebRtcSpl_MaxValueW16
|
||||||
|
|
||||||
|
sub r12, r6, #1 @ Loop counter: inst->magnLen - 1.
|
||||||
|
|
||||||
|
movw r6, #11819 @ kExp2Const in Q13
|
||||||
|
movw r2, #offset_nsx_noiseEstQuantile
|
||||||
|
vdup.16 d16, r6
|
||||||
|
smulbb r3, r6, r0
|
||||||
|
add r0, r3, #1 << 20 @ Round
|
||||||
|
movw r1, #offset_nsx_qNoise
|
||||||
|
mov r0, r0, lsr #21
|
||||||
|
rsb r0, r0, #14 @ 14 - (round(kExp2Const * tmp16) >> 21)
|
||||||
|
add r2, r5 @ &inst->noiseEstQuantile
|
||||||
|
vdup.32 q13, r0
|
||||||
|
str r0, [r5, r1]
|
||||||
|
|
||||||
|
|
||||||
|
LOOP_UPDATE:
|
||||||
|
vld1.16 {d0, d1}, [r4]! @ &inst->noiseEstLogQuantile[offset + i]
|
||||||
|
vmull.s16 q1, d0, d16
|
||||||
|
vmull.s16 q0, d1, d16
|
||||||
|
vshr.s32 q3, q1, #21
|
||||||
|
vshr.s32 q2, q0, #21
|
||||||
|
vand q1, q1, q11
|
||||||
|
vand q0, q0, q11
|
||||||
|
vsub.i32 q3, q3, q10
|
||||||
|
vsub.i32 q2, q2, q10
|
||||||
|
vorr q1, q1, q9
|
||||||
|
vorr q0, q0, q9
|
||||||
|
vadd.i32 q3, q3, q13
|
||||||
|
vadd.i32 q2, q2, q13
|
||||||
|
vshl.s32 q1, q1, q3
|
||||||
|
vshl.s32 q0, q0, q2
|
||||||
|
vqmovn.s32 d1, q0
|
||||||
|
vqmovn.s32 d0, q1
|
||||||
|
subs r12, #8
|
||||||
|
vst1.16 {d0, d1}, [r2]!
|
||||||
|
bgt LOOP_UPDATE
|
||||||
|
|
||||||
|
POST_LOOP_MAGNLEN:
|
||||||
|
ldrh r1, [r4]
|
||||||
|
smulbb r3, r6, r1 @ kExp2Const * ptr_noiseEstLogQuantile[offset + i]
|
||||||
|
mov r12, #0x00200000
|
||||||
|
bfi r12, r3, #0, #21 @ tmp32no1 = 0x00200000 | (tmp32no2 & 0x001FFFFF);
|
||||||
|
rsb r0, #21 @ 21 - &inst->qNoise
|
||||||
|
sub r14, r0, r3, lsr #21 @ -tmp16
|
||||||
|
mov r0, r12, lsr r14
|
||||||
|
ssat r3, #16, r0
|
||||||
|
strh r3, [r2]
|
||||||
|
|
||||||
|
pop {r4, r5, r6, pc}
|
||||||
|
.fnend
|
||||||
|
|
||||||
|
@ void PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf);
|
||||||
|
WebRtcNsx_PrepareSpectrumNeon:
|
||||||
|
.fnstart
|
||||||
|
.save {r4-r8}
|
||||||
|
|
||||||
|
push {r4-r8}
|
||||||
|
|
||||||
|
movw r2, #offset_nsx_real
|
||||||
|
movw r12, #offset_nsx_noiseSupFilter
|
||||||
|
movw r4, #offset_nsx_imag
|
||||||
|
movw r5, #offset_nsx_magnLen
|
||||||
|
|
||||||
|
add r2, r0 @ &inst->real[0]
|
||||||
|
add r4, r0 @ &inst->image[0]
|
||||||
|
mov r9, r4 @ &inst->image[0]
|
||||||
|
mov r3, r2 @ &inst->real[0]
|
||||||
|
ldr r5, [r0, r5] @ inst->magnLen
|
||||||
|
add r6, r4, #2 @ &inst->image[1]
|
||||||
|
sub r5, #1
|
||||||
|
add r12, r0 @ &inst->noiseSupFilter[0]
|
||||||
|
add r5, r2, r5, lsl #1 @ &inst->real[inst->magnLen - 1]
|
||||||
|
|
||||||
|
LOOP_MAGNLEN:
|
||||||
|
@ Filter the elements.
|
||||||
|
vld1.16 {d20, d21}, [r2] @ inst->real[]
|
||||||
|
vld1.16 {d24, d25}, [r12]! @ inst->noiseSupFilter[]
|
||||||
|
vld1.16 {d22, d23}, [r4] @ inst->imag[]
|
||||||
|
vmull.s16 q0, d20, d24
|
||||||
|
vmull.s16 q1, d21, d25
|
||||||
|
vmull.s16 q2, d22, d24
|
||||||
|
vmull.s16 q3, d23, d25
|
||||||
|
vshrn.s32 d0, q0, #14
|
||||||
|
vshrn.s32 d1, q1, #14
|
||||||
|
vshrn.s32 d2, q2, #14
|
||||||
|
vshrn.s32 d3, q3, #14
|
||||||
|
vst1.16 {d0, d1}, [r2]!
|
||||||
|
vst1.16 {d2, d3}, [r4]!
|
||||||
|
cmp r2, r5
|
||||||
|
bcc LOOP_MAGNLEN
|
||||||
|
|
||||||
|
@ Last two elements to filter:
|
||||||
|
ldrh r7, [r2]
|
||||||
|
ldrh r8, [r12]
|
||||||
|
ldrh r5, [r4]
|
||||||
|
smulbb r7, r7, r8
|
||||||
|
smulbb r5, r5, r8
|
||||||
|
mov r7, r7, lsr #14
|
||||||
|
mov r8, r5, lsr #14
|
||||||
|
strh r7, [r2]
|
||||||
|
strh r8, [r4]
|
||||||
|
|
||||||
|
ldr r5, [r0, #offset_nsx_anaLen2] @ inst->anaLen2
|
||||||
|
ldr r7, [r0, #offset_nsx_anaLen] @ inst->anaLen
|
||||||
|
add r5, r3, r5, lsl #1 @ &inst->real[inst->anaLen2]
|
||||||
|
|
||||||
|
ldrh r2, [r3], #2 @ inst->real[0]
|
||||||
|
ldrh r0, [r9] @ inst->imag[0]
|
||||||
|
strh r2, [r1], #2 @ Store to freq_buf[0]
|
||||||
|
rsb r0, r0, #0
|
||||||
|
strh r0, [r1], #2 @ Store to freq_buf[1]. Now r1 -> &freq_buf[2]
|
||||||
|
|
||||||
|
add r2, r1, r7, lsl #2
|
||||||
|
sub r2, #36 @ &freq_buf[-16]
|
||||||
|
|
||||||
|
mvn r12, #0x1F @ -32
|
||||||
|
|
||||||
|
@ At the last iteration, &freq_buf[inst->anaLen + 1] will be written to by both
|
||||||
|
@ the vst1 instructions. Only the 2nd vst1 instruction has the correct value
|
||||||
|
@ (-inst->imag[inst->anaLen2]), so the order of the two vst1's is important.
|
||||||
|
LOOP_ANALEN2:
|
||||||
|
vld1.16 {d0, d1}, [r3]! @ inst->real[], starting from inst->real[1]
|
||||||
|
vld1.16 {d2, d3}, [r6]! @ inst->imag[], starting from inst->imag[1]
|
||||||
|
vmov.s16 d4, d0
|
||||||
|
vmov.s16 d6, d1
|
||||||
|
vneg.s16 d5, d2
|
||||||
|
vneg.s16 d7, d3
|
||||||
|
vzip.16 d0, d2
|
||||||
|
vzip.16 d1, d3
|
||||||
|
vzip.16 d4, d5
|
||||||
|
vzip.16 d6, d7
|
||||||
|
vrev64.32 d16, d3
|
||||||
|
vrev64.32 d17, d1
|
||||||
|
vrev64.32 d18, d2
|
||||||
|
vrev64.32 d19, d0
|
||||||
|
cmp r3, r5
|
||||||
|
vst1.16 {d16, d17, d18, d19}, [r2], r12
|
||||||
|
vst1.16 {d4, d5, d6, d7}, [r1]!
|
||||||
|
bls LOOP_ANALEN2
|
||||||
|
|
||||||
|
pop {r4-r8}
|
||||||
|
bx r14
|
||||||
|
.fnend
|
||||||
|
|
||||||
|
@ void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor);
|
||||||
|
WebRtcNsx_DenormalizeNeon:
|
||||||
|
.fnstart
|
||||||
|
movw r12, #offset_nsx_normData
|
||||||
|
movw r3, #offset_nsx_real
|
||||||
|
ldr r12, [r0, r12] @ inst->normData
|
||||||
|
add r3, r0 @ &inst->real[0]
|
||||||
|
sub r2, r12
|
||||||
|
vdup.32 q10, r2
|
||||||
|
|
||||||
|
movw r2, #offset_nsx_anaLen
|
||||||
|
ldrsh r2, [r0, r2] @ inst->anaLen
|
||||||
|
add r0, r3, r2, lsl #1 @ &inst->real[inst->anaLen]
|
||||||
|
|
||||||
|
LOOP_ANALEN:
|
||||||
|
vld2.16 {d0, d1}, [r1]! @ &in[]
|
||||||
|
vld2.16 {d2, d3}, [r1]! @ &in[]
|
||||||
|
vmovl.s16 q2, d0
|
||||||
|
vmovl.s16 q3, d2
|
||||||
|
vshl.s32 q2, q10
|
||||||
|
vshl.s32 q3, q10
|
||||||
|
vqmovn.s32 d0, q2
|
||||||
|
vqmovn.s32 d1, q3
|
||||||
|
vst1.16 {d0, d1}, [r3]! @ inst->real[]
|
||||||
|
cmp r3, r0
|
||||||
|
blt LOOP_ANALEN
|
||||||
|
|
||||||
|
bx r14
|
||||||
|
.fnend
|
||||||
|
|
||||||
|
@ void SynthesisUpdateNeon(NsxInst_t* inst,
|
||||||
|
@ int16_t* out_frame,
|
||||||
|
@ int16_t gain_factor);
|
||||||
|
WebRtcNsx_SynthesisUpdateNeon:
|
||||||
|
.fnstart
|
||||||
|
.save {r4, r5}
|
||||||
|
push {r4, r5}
|
||||||
|
|
||||||
|
vdup.16 d31, r2
|
||||||
|
|
||||||
|
movw r2, #offset_nsx_anaLen
|
||||||
|
movw r4, #offset_nsx_real
|
||||||
|
movw r12, #offset_nsx_synthesisBuffer
|
||||||
|
|
||||||
|
ldrsh r5, [r0, r2] @ inst->anaLen
|
||||||
|
add r12, r0 @ &inst->synthesisBuffer[0];
|
||||||
|
ldr r3, [r0, #offset_nsx_window] @ &inst->window[0]
|
||||||
|
add r4, r0 @ &inst->real[0]
|
||||||
|
add r5, r12, r5, lsl #1 @ &inst->synthesisBuffer[inst->anaLen]
|
||||||
|
|
||||||
|
mov r2, r12 @ &inst->synthesisBuffer[0];
|
||||||
|
|
||||||
|
LOOP_SYNTHESIS:
|
||||||
|
vld1.16 {d0, d1}, [r4]! @ inst->real[]
|
||||||
|
vld1.16 {d2, d3}, [r3]! @ inst->window[]
|
||||||
|
vld1.16 {d4, d5}, [r2] @ inst->synthesisBuffer[];
|
||||||
|
vmull.s16 q3, d0, d2
|
||||||
|
vmull.s16 q8, d1, d3
|
||||||
|
vrshrn.i32 d0, q3, #14
|
||||||
|
vrshrn.i32 d1, q8, #14
|
||||||
|
vmull.s16 q3, d31, d0
|
||||||
|
vmull.s16 q8, d31, d1
|
||||||
|
vqrshrn.s32 d0, q3, #13
|
||||||
|
vqrshrn.s32 d1, q8, #13
|
||||||
|
vqadd.s16 d4, d0
|
||||||
|
vqadd.s16 d5, d1
|
||||||
|
vst1.16 {d4, d5}, [r2]!
|
||||||
|
cmp r2, r5
|
||||||
|
blt LOOP_SYNTHESIS
|
||||||
|
|
||||||
|
POST_LOOP_SYNTHESIS:
|
||||||
|
movw r3, #offset_nsx_blockLen10ms
|
||||||
|
ldr r2, [r0, r3]
|
||||||
|
mov r3, r12 @ &inst->synthesisBuffer[0];
|
||||||
|
add r0, r12, r2, lsl #1 @ &inst->synthesisBuffer[inst->blockLen10ms]
|
||||||
|
|
||||||
|
LOOP_BLOCKLEN10MS:
|
||||||
|
vld1.16 {q0, q1}, [r3]! @ inst->synthesisBuffer[];
|
||||||
|
cmp r3, r0
|
||||||
|
vst1.16 {q0, q1}, [r1]! @ out_frame[]
|
||||||
|
blt LOOP_BLOCKLEN10MS
|
||||||
|
|
||||||
|
cmp r0, r5
|
||||||
|
bge POST_LOOP_MEMCPY
|
||||||
|
|
||||||
|
LOOP_MEMCPY:
|
||||||
|
vld1.16 {q0, q1}, [r0]! @ inst->synthesisBuffer[i + inst->blockLen10ms]
|
||||||
|
cmp r0, r5
|
||||||
|
vst1.16 {q0, q1}, [r12]! @ inst->synthesisBuffer[i]
|
||||||
|
blt LOOP_MEMCPY
|
||||||
|
|
||||||
|
POST_LOOP_MEMCPY:
|
||||||
|
cmp r12, r5
|
||||||
|
vmov.i16 q10, #0
|
||||||
|
vmov.i16 q11, #0
|
||||||
|
bge EXIT_SYNTHESISUPDATE
|
||||||
|
|
||||||
|
LOOP_ZEROSARRAY:
|
||||||
|
vst1.16 {q10, q11}, [r12]! @ inst->synthesisBuffer[i + inst->anaLen]
|
||||||
|
cmp r12, r5
|
||||||
|
blt LOOP_ZEROSARRAY
|
||||||
|
|
||||||
|
EXIT_SYNTHESISUPDATE:
|
||||||
|
pop {r4, r5}
|
||||||
|
bx r14
|
||||||
|
|
||||||
|
.fnend
|
||||||
|
|
||||||
|
@ void AnalysisUpdateNeon(NsxInst_t* inst, int16_t* out, int16_t* new_speech);
|
||||||
|
WebRtcNsx_AnalysisUpdateNeon:
|
||||||
|
.fnstart
|
||||||
|
.save {r4-r6}
|
||||||
|
push {r4-r6}
|
||||||
|
|
||||||
|
movw r3, #offset_nsx_analysisBuffer
|
||||||
|
movw r4, #offset_nsx_anaLen
|
||||||
|
movw r12, #offset_nsx_blockLen10ms
|
||||||
|
add r3, r0 @ &inst->analysisBuffer[0]
|
||||||
|
ldrsh r4, [r0, r4] @ inst->anaLen
|
||||||
|
ldr r12, [r0, r12] @ inst->blockLen10ms
|
||||||
|
sub r6, r4, r12
|
||||||
|
add r6, r3, r6, lsl #1 @ &inst->analysisBuffer[inst->anaLen
|
||||||
|
@ - inst->blockLen10ms]
|
||||||
|
cmp r3, r6
|
||||||
|
mov r5, r3
|
||||||
|
bge POST_LOOP_MEMCPY_1
|
||||||
|
|
||||||
|
add r12, r3, r12, lsl #1 @ &inst->analysisBuffer[inst->blockLen10ms]
|
||||||
|
|
||||||
|
LOOP_MEMCPY_1:
|
||||||
|
vld1.16 {q10, q11}, [r12]! @ inst->analysisBuffer[i + inst->blockLen10ms]
|
||||||
|
vst1.16 {q10, q11}, [r5]! @ inst->analysisBuffer[i]
|
||||||
|
cmp r5, r6
|
||||||
|
blt LOOP_MEMCPY_1
|
||||||
|
|
||||||
|
POST_LOOP_MEMCPY_1:
|
||||||
|
add r12, r3, r4, lsl #1 @ &inst->analysisBuffer[inst->anaLen]
|
||||||
|
cmp r5, r12
|
||||||
|
bge POST_LOOP_MEMCPY_2
|
||||||
|
|
||||||
|
LOOP_MEMCPY_2:
|
||||||
|
vld1.16 {q10, q11}, [r2]! @ new_speech[i]
|
||||||
|
vst1.16 {q10, q11}, [r5]! @ inst->analysisBuffer[
|
||||||
|
@ i + inst->anaLen - inst->blockLen10ms]
|
||||||
|
cmp r5, r12
|
||||||
|
blt LOOP_MEMCPY_2
|
||||||
|
|
||||||
|
POST_LOOP_MEMCPY_2:
|
||||||
|
add r4, r1, r4, lsl #1 @ &out[inst->anaLen]
|
||||||
|
cmp r1, r4
|
||||||
|
ldr r2, [r0, #offset_nsx_window] @ &inst->window[0]
|
||||||
|
bge POST_LOOP_WINDOW_DATA
|
||||||
|
|
||||||
|
LOOP_WINDOW_DATA:
|
||||||
|
vld1.16 {d4, d5}, [r3]! @ inst->analysisBuffer[]
|
||||||
|
vld1.16 {d6, d7}, [r2]! @ inst->window[]
|
||||||
|
vmull.s16 q0, d4, d6
|
||||||
|
vmull.s16 q1, d5, d7
|
||||||
|
vrshrn.i32 d4, q0, #14
|
||||||
|
vrshrn.i32 d5, q1, #14
|
||||||
|
vst1.16 {d4, d5}, [r1]! @ out[]
|
||||||
|
cmp r1, r4
|
||||||
|
blt LOOP_WINDOW_DATA
|
||||||
|
|
||||||
|
POST_LOOP_WINDOW_DATA:
|
||||||
|
pop {r4-r6}
|
||||||
|
bx r14
|
||||||
|
.fnend
|
||||||
|
|
||||||
|
@ void CreateComplexBufferNeon(NsxInst_t* inst, int16_t* in, int16_t* out);
|
||||||
|
WebRtcNsx_CreateComplexBufferNeon:
|
||||||
|
.fnstart
|
||||||
|
movw r3, #offset_nsx_anaLen
|
||||||
|
movw r12, #offset_nsx_normData
|
||||||
|
ldrsh r3, [r0, r3] @ inst->anaLen
|
||||||
|
ldr r12, [r0, r12] @ inst->normData
|
||||||
|
add r3, r1, r3, lsl #1 @ &in[inst->anaLen]
|
||||||
|
|
||||||
|
vmov.i16 d7, #0 @ For writing to imaginary parts.
|
||||||
|
vmov.i16 d5, #0 @ For writing to imaginary parts.
|
||||||
|
vdup.i16 q10, r12
|
||||||
|
|
||||||
|
LOOP_CREATE_COMPLEX_BUFFER: @ Unrolled by 16.
|
||||||
|
vld1.16 {d0, d1, d2, d3}, [r1]! @ in[]
|
||||||
|
cmp r1, r3
|
||||||
|
vshl.s16 q0, q10
|
||||||
|
vshl.s16 q1, q10
|
||||||
|
vmov d4, d1
|
||||||
|
vmov d1, d5
|
||||||
|
vmov d6, d3
|
||||||
|
vmov d3, d7
|
||||||
|
vst2.16 {d0, d1}, [r2]!
|
||||||
|
vst2.16 {d4, d5}, [r2]!
|
||||||
|
vst2.16 {d2, d3}, [r2]!
|
||||||
|
vst2.16 {d6, d7}, [r2]!
|
||||||
|
blt LOOP_CREATE_COMPLEX_BUFFER
|
||||||
|
|
||||||
|
bx r14
|
||||||
|
.fnend
|
@ -91,10 +91,10 @@ static void UpdateNoiseEstimateNeon(NsxInst_t* inst, int offset) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Noise Estimation
|
// Noise Estimation
|
||||||
static void NoiseEstimationNeon(NsxInst_t* inst,
|
void WebRtcNsx_NoiseEstimationNeon(NsxInst_t* inst,
|
||||||
uint16_t* magn,
|
uint16_t* magn,
|
||||||
uint32_t* noise,
|
uint32_t* noise,
|
||||||
int16_t* q_noise) {
|
int16_t* q_noise) {
|
||||||
int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv;
|
int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv;
|
||||||
int16_t countProd, delta, zeros, frac;
|
int16_t countProd, delta, zeros, frac;
|
||||||
int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
|
int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
|
||||||
@ -320,7 +320,7 @@ static void NoiseEstimationNeon(NsxInst_t* inst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Filter the data in the frequency domain, and create spectrum.
|
// Filter the data in the frequency domain, and create spectrum.
|
||||||
static void PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf) {
|
void WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf) {
|
||||||
|
|
||||||
// (1) Filtering.
|
// (1) Filtering.
|
||||||
|
|
||||||
@ -455,7 +455,7 @@ static void PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Denormalize the input buffer.
|
// Denormalize the input buffer.
|
||||||
static __inline void DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor) {
|
void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor) {
|
||||||
int16_t* ptr_real = &inst->real[0];
|
int16_t* ptr_real = &inst->real[0];
|
||||||
int16_t* ptr_in = &in[0];
|
int16_t* ptr_in = &in[0];
|
||||||
|
|
||||||
@ -494,9 +494,9 @@ static __inline void DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor) {
|
|||||||
|
|
||||||
// For the noise supress process, synthesis, read out fully processed segment,
|
// For the noise supress process, synthesis, read out fully processed segment,
|
||||||
// and update synthesis buffer.
|
// and update synthesis buffer.
|
||||||
static void SynthesisUpdateNeon(NsxInst_t* inst,
|
void WebRtcNsx_SynthesisUpdateNeon(NsxInst_t* inst,
|
||||||
int16_t* out_frame,
|
int16_t* out_frame,
|
||||||
int16_t gain_factor) {
|
int16_t gain_factor) {
|
||||||
int16_t* ptr_real = &inst->real[0];
|
int16_t* ptr_real = &inst->real[0];
|
||||||
int16_t* ptr_syn = &inst->synthesisBuffer[0];
|
int16_t* ptr_syn = &inst->synthesisBuffer[0];
|
||||||
const int16_t* ptr_window = &inst->window[0];
|
const int16_t* ptr_window = &inst->window[0];
|
||||||
@ -605,9 +605,9 @@ static void SynthesisUpdateNeon(NsxInst_t* inst,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Update analysis buffer for lower band, and window data before FFT.
|
// Update analysis buffer for lower band, and window data before FFT.
|
||||||
static void AnalysisUpdateNeon(NsxInst_t* inst,
|
void WebRtcNsx_AnalysisUpdateNeon(NsxInst_t* inst,
|
||||||
int16_t* out,
|
int16_t* out,
|
||||||
int16_t* new_speech) {
|
int16_t* new_speech) {
|
||||||
|
|
||||||
int16_t* ptr_ana = &inst->analysisBuffer[inst->blockLen10ms];
|
int16_t* ptr_ana = &inst->analysisBuffer[inst->blockLen10ms];
|
||||||
int16_t* ptr_out = &inst->analysisBuffer[0];
|
int16_t* ptr_out = &inst->analysisBuffer[0];
|
||||||
@ -682,9 +682,9 @@ static void AnalysisUpdateNeon(NsxInst_t* inst,
|
|||||||
|
|
||||||
// Create a complex number buffer (out[]) as the intput (in[]) interleaved with
|
// Create a complex number buffer (out[]) as the intput (in[]) interleaved with
|
||||||
// zeros, and normalize it.
|
// zeros, and normalize it.
|
||||||
static __inline void CreateComplexBufferNeon(NsxInst_t* inst,
|
void WebRtcNsx_CreateComplexBufferNeon(NsxInst_t* inst,
|
||||||
int16_t* in,
|
int16_t* in,
|
||||||
int16_t* out) {
|
int16_t* out) {
|
||||||
int16_t* ptr_out = &out[0];
|
int16_t* ptr_out = &out[0];
|
||||||
int16_t* ptr_in = &in[0];
|
int16_t* ptr_in = &in[0];
|
||||||
|
|
||||||
@ -723,12 +723,3 @@ static __inline void CreateComplexBufferNeon(NsxInst_t* inst,
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void WebRtcNsx_InitNeon(void) {
|
|
||||||
WebRtcNsx_NoiseEstimation = NoiseEstimationNeon;
|
|
||||||
WebRtcNsx_PrepareSpectrum = PrepareSpectrumNeon;
|
|
||||||
WebRtcNsx_SynthesisUpdate = SynthesisUpdateNeon;
|
|
||||||
WebRtcNsx_AnalysisUpdate = AnalysisUpdateNeon;
|
|
||||||
WebRtcNsx_Denormalize = DenormalizeNeon;
|
|
||||||
WebRtcNsx_CreateComplexBuffer = CreateComplexBufferNeon;
|
|
||||||
}
|
|
||||||
|
34
src/modules/audio_processing/ns/nsx_core_neon_offsets.c
Normal file
34
src/modules/audio_processing/ns/nsx_core_neon_offsets.c
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "nsx_core.h"
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
// Define offset variables that will be compiled and abstracted to constant
|
||||||
|
// defines, which will then only be used in ARM assembly code.
|
||||||
|
int offset_nsx_anaLen = offsetof(NsxInst_t, anaLen);
|
||||||
|
int offset_nsx_anaLen2 = offsetof(NsxInst_t, anaLen2);
|
||||||
|
int offset_nsx_normData = offsetof(NsxInst_t, normData);
|
||||||
|
int offset_nsx_analysisBuffer = offsetof(NsxInst_t, analysisBuffer);
|
||||||
|
int offset_nsx_synthesisBuffer = offsetof(NsxInst_t, synthesisBuffer);
|
||||||
|
int offset_nsx_blockLen10ms = offsetof(NsxInst_t, blockLen10ms);
|
||||||
|
int offset_nsx_window = offsetof(NsxInst_t, window);
|
||||||
|
int offset_nsx_real = offsetof(NsxInst_t, real);
|
||||||
|
int offset_nsx_imag = offsetof(NsxInst_t, imag);
|
||||||
|
int offset_nsx_noiseSupFilter = offsetof(NsxInst_t, noiseSupFilter);
|
||||||
|
int offset_nsx_magnLen = offsetof(NsxInst_t, magnLen);
|
||||||
|
int offset_nsx_noiseEstLogQuantile = offsetof(NsxInst_t, noiseEstLogQuantile);
|
||||||
|
int offset_nsx_noiseEstQuantile = offsetof(NsxInst_t, noiseEstQuantile);
|
||||||
|
int offset_nsx_qNoise = offsetof(NsxInst_t, qNoise);
|
||||||
|
int offset_nsx_stages = offsetof(NsxInst_t, stages);
|
||||||
|
int offset_nsx_blockIndex = offsetof(NsxInst_t, blockIndex);
|
||||||
|
int offset_nsx_noiseEstCounter = offsetof(NsxInst_t, noiseEstCounter);
|
||||||
|
int offset_nsx_noiseEstDensity = offsetof(NsxInst_t, noiseEstDensity);
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* Use of this source code is governed by a BSD-style license
|
* Use of this source code is governed by a BSD-style license
|
||||||
* that can be found in the LICENSE file in the root of the source
|
* that can be found in the LICENSE file in the root of the source
|
||||||
@ -11,49 +11,53 @@
|
|||||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_
|
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_
|
||||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_
|
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_
|
||||||
|
|
||||||
#define ANAL_BLOCKL_MAX 256 // max analysis block length
|
#define ANAL_BLOCKL_MAX 256 /* Max analysis block length */
|
||||||
#define HALF_ANAL_BLOCKL 129 // half max analysis block length + 1
|
#define HALF_ANAL_BLOCKL 129 /* Half max analysis block length + 1 */
|
||||||
#define SIMULT 3
|
#define SIMULT 3
|
||||||
#define END_STARTUP_LONG 200
|
#define END_STARTUP_LONG 200
|
||||||
#define END_STARTUP_SHORT 50
|
#define END_STARTUP_SHORT 50
|
||||||
#define FACTOR_Q16 (WebRtc_Word32)2621440 // 40 in Q16
|
#define FACTOR_Q16 2621440 /* 40 in Q16 */
|
||||||
#define FACTOR_Q7 (WebRtc_Word16)5120 // 40 in Q7
|
#define FACTOR_Q7 5120 /* 40 in Q7 */
|
||||||
#define FACTOR_Q7_STARTUP (WebRtc_Word16)1024 // 8 in Q7
|
#define FACTOR_Q7_STARTUP 1024 /* 8 in Q7 */
|
||||||
#define WIDTH_Q8 3 // 0.01 in Q8 (or 25 )
|
#define WIDTH_Q8 3 /* 0.01 in Q8 (or 25 ) */
|
||||||
//PARAMETERS FOR NEW METHOD
|
|
||||||
#define DD_PR_SNR_Q11 2007 // ~= Q11(0.98) DD update of prior SNR
|
/* PARAMETERS FOR NEW METHOD */
|
||||||
#define ONE_MINUS_DD_PR_SNR_Q11 41 // DD update of prior SNR
|
#define DD_PR_SNR_Q11 2007 /* ~= Q11(0.98) DD update of prior SNR */
|
||||||
#define SPECT_FLAT_TAVG_Q14 4915 // (0.30) tavg parameter for spectral flatness measure
|
#define ONE_MINUS_DD_PR_SNR_Q11 41 /* DD update of prior SNR */
|
||||||
#define SPECT_DIFF_TAVG_Q8 77 // (0.30) tavg parameter for spectral flatness measure
|
#define SPECT_FLAT_TAVG_Q14 4915 /* (0.30) tavg parameter for spectral flatness measure */
|
||||||
#define PRIOR_UPDATE_Q14 1638 // Q14(0.1) update parameter of prior model
|
#define SPECT_DIFF_TAVG_Q8 77 /* (0.30) tavg parameter for spectral flatness measure */
|
||||||
#define NOISE_UPDATE_Q8 26 // 26 ~= Q8(0.1) update parameter for noise
|
#define PRIOR_UPDATE_Q14 1638 /* Q14(0.1) Update parameter of prior model */
|
||||||
// probability threshold for noise state in speech/noise likelihood
|
#define NOISE_UPDATE_Q8 26 /* 26 ~= Q8(0.1) Update parameter for noise */
|
||||||
#define ONE_MINUS_PROB_RANGE_Q8 205 // 205 ~= Q8(0.8)
|
|
||||||
#define HIST_PAR_EST 1000 // histogram size for estimation of parameters
|
/* Probability threshold for noise state in speech/noise likelihood. */
|
||||||
//FEATURE EXTRACTION CONFIG
|
#define ONE_MINUS_PROB_RANGE_Q8 205 /* 205 ~= Q8(0.8) */
|
||||||
//bin size of histogram
|
#define HIST_PAR_EST 1000 /* Histogram size for estimation of parameters */
|
||||||
|
|
||||||
|
/* FEATURE EXTRACTION CONFIG */
|
||||||
|
/* Bin size of histogram */
|
||||||
#define BIN_SIZE_LRT 10
|
#define BIN_SIZE_LRT 10
|
||||||
//scale parameters: multiply dominant peaks of the histograms by scale factor to obtain
|
/* Scale parameters: multiply dominant peaks of the histograms by scale factor to obtain. */
|
||||||
// thresholds for prior model
|
/* Thresholds for prior model */
|
||||||
#define FACTOR_1_LRT_DIFF 6 //for LRT and spectral difference (5 times bigger)
|
#define FACTOR_1_LRT_DIFF 6 /* For LRT and spectral difference (5 times bigger) */
|
||||||
//for spectral_flatness: used when noise is flatter than speech (10 times bigger)
|
/* For spectral_flatness: used when noise is flatter than speech (10 times bigger). */
|
||||||
#define FACTOR_2_FLAT_Q10 922
|
#define FACTOR_2_FLAT_Q10 922
|
||||||
//peak limit for spectral flatness (varies between 0 and 1)
|
/* Peak limit for spectral flatness (varies between 0 and 1) */
|
||||||
#define THRES_PEAK_FLAT 24 // * 2 * BIN_SIZE_FLAT_FX
|
#define THRES_PEAK_FLAT 24 /* * 2 * BIN_SIZE_FLAT_FX */
|
||||||
//limit on spacing of two highest peaks in histogram: spacing determined by bin size
|
/* Limit on spacing of two highest peaks in histogram: spacing determined by bin size. */
|
||||||
#define LIM_PEAK_SPACE_FLAT_DIFF 4 // * 2 * BIN_SIZE_DIFF_FX
|
#define LIM_PEAK_SPACE_FLAT_DIFF 4 /* * 2 * BIN_SIZE_DIFF_FX */
|
||||||
//limit on relevance of second peak:
|
/* Limit on relevance of second peak */
|
||||||
#define LIM_PEAK_WEIGHT_FLAT_DIFF 2
|
#define LIM_PEAK_WEIGHT_FLAT_DIFF 2
|
||||||
#define THRES_FLUCT_LRT 10240 //=20 * inst->modelUpdate; fluctuation limit of LRT feat.
|
#define THRES_FLUCT_LRT 10240 /* = 20 * inst->modelUpdate; fluctuation limit of LRT feat. */
|
||||||
//limit on the max and min values for the feature thresholds
|
/* Limit on the max and min values for the feature thresholds */
|
||||||
#define MAX_FLAT_Q10 38912 // * 2 * BIN_SIZE_FLAT_FX
|
#define MAX_FLAT_Q10 38912 /* * 2 * BIN_SIZE_FLAT_FX */
|
||||||
#define MIN_FLAT_Q10 4096 // * 2 * BIN_SIZE_FLAT_FX
|
#define MIN_FLAT_Q10 4096 /* * 2 * BIN_SIZE_FLAT_FX */
|
||||||
#define MAX_DIFF 100 // * 2 * BIN_SIZE_DIFF_FX
|
#define MAX_DIFF 100 /* * 2 * BIN_SIZE_DIFF_FX */
|
||||||
#define MIN_DIFF 16 // * 2 * BIN_SIZE_DIFF_FX
|
#define MIN_DIFF 16 /* * 2 * BIN_SIZE_DIFF_FX */
|
||||||
//criteria of weight of histogram peak to accept/reject feature
|
/* Criteria of weight of histogram peak to accept/reject feature */
|
||||||
#define THRES_WEIGHT_FLAT_DIFF 154//(int)(0.3*(inst->modelUpdate)) for flatness and difference
|
#define THRES_WEIGHT_FLAT_DIFF 154 /*(int)(0.3*(inst->modelUpdate)) for flatness and difference */
|
||||||
//
|
|
||||||
#define STAT_UPDATES 9 // Update every 512 = 1 << 9 block
|
#define STAT_UPDATES 9 /* Update every 512 = 1 << 9 block */
|
||||||
#define ONE_MINUS_GAMMA_PAUSE_Q8 13 // ~= Q8(0.05) update for conservative noise estimate
|
#define ONE_MINUS_GAMMA_PAUSE_Q8 13 /* ~= Q8(0.05) Update for conservative noise estimate */
|
||||||
#define GAMMA_NOISE_TRANS_AND_SPEECH_Q8 3 // ~= Q8(0.01) update for transition and noise region
|
#define GAMMA_NOISE_TRANS_AND_SPEECH_Q8 3 /* ~= Q8(0.01) Update for transition and noise region */
|
||||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_
|
|
||||||
|
#endif /* WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user