Refactored ARM specific code in Noise Suppression. Bit exact.

Review URL: https://webrtc-codereview.appspot.com/459006 git-svn-id: http://webrtc.googlecode.com/svn/trunk@2303 4adac7df-926f-26a2-2b94-8c16560cd09d
2012-05-26 01:05:27 +00:00 · 2012-05-26 01:05:27 +00:00 · 0d321da7e1
commit 0d321da7e1
parent 1755a57cbc
8 changed files with 874 additions and 72 deletions
--- a/src/build/generate_asm_header.py
+++ b/src/build/generate_asm_header.py
@ -0,0 +1,46 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS.  All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+"""This script generates a C header file of offsets from an ARM assembler file.
+
+It parses an ARM assembler generated .S file, finds declarations of variables
+whose names start with the string specified as the third argument in the
+command-line, translates the variable names and values into constant defines and
+writes them into a header file.
+"""
+
+import sys
+
+def usage():
+  print("Usage: generate_asm_header.py " +
+     "<input filename> <output filename> <variable name pattern>")
+  sys.exit(1)
+
+def main(argv):
+  if len(argv) != 3:
+    usage()
+
+  infile = open(argv[0])
+  outfile = open(argv[1], 'w')
+
+  for line in infile:  # Iterate though all the lines in the input file.
+    if line.startswith(argv[2]):
+      outfile.write('#define ')
+      outfile.write(line.split(':')[0])  # Write the constant name.
+      outfile.write(' ')
+
+    if line.find('.word') >= 0:
+      outfile.write(line.split('.word')[1])  # Write the constant value.
+
+  infile.close()
+  outfile.close()
+
+if __name__ == "__main__":
+  main(sys.argv[1:])
--- a/src/modules/audio_processing/ns/Android.mk
+++ b/src/modules/audio_processing/ns/Android.mk
@ -23,7 +23,7 @@ LOCAL_SRC_FILES := \
    nsx_core.c

 # Files for floating point.
-# noise_suppression.c ns_core.c 
+# noise_suppression.c ns_core.c

 # Flags passed to both C and C++ files.
 LOCAL_CFLAGS := $(MY_WEBRTC_COMMON_DEFS)
@ -57,8 +57,20 @@ LOCAL_ARM_MODE := arm
 LOCAL_MODULE_CLASS := STATIC_LIBRARIES
 LOCAL_MODULE := libwebrtc_ns_neon
 LOCAL_MODULE_TAGS := optional
+GEN := $(LOCAL_PATH)/nsx_core_neon_offsets.h

-LOCAL_SRC_FILES := nsx_core_neon.c
+# Generate a header file nsx_core_neon_offsets.h which will be included in
+# assembly file nsx_core_neon.S, from file nsx_core_neon_offsets.c.
+$(GEN): $(LOCAL_PATH)/../../../../src/build/generate_asm_header.py \
+            $(intermediates)/nsx_core_neon_offsets.S
+	@python $^ $@ offset_nsx_
+
+$(intermediates)/nsx_core_neon_offsets.S: $(LOCAL_PATH)/nsx_core_neon_offsets.c
+	@$(TARGET_CC) $(addprefix -I, $(LOCAL_INCLUDES)) $(addprefix -isystem ,\
+            $(TARGET_C_INCLUDES)) -S -o $@ $^
+
+LOCAL_GENERATED_SOURCES := $(GEN)
+LOCAL_SRC_FILES := nsx_core_neon.S

 # Flags passed to both C and C++ files.
 LOCAL_CFLAGS := \
@ -72,6 +84,8 @@ LOCAL_C_INCLUDES := \
    $(LOCAL_PATH)/../../.. \
    $(LOCAL_PATH)/../../../common_audio/signal_processing/include

+LOCAL_INCLUDES := $(LOCAL_C_INCLUDES)
+
 ifndef NDK_ROOT
 include external/stlport/libstlport.mk
 endif
--- a/src/modules/audio_processing/ns/nsx_core.c
+++ b/src/modules/audio_processing/ns/nsx_core.c
@ -435,6 +435,18 @@ AnalysisUpdate WebRtcNsx_AnalysisUpdate;
 Denormalize WebRtcNsx_Denormalize;
 CreateComplexBuffer WebRtcNsx_CreateComplexBuffer;

+#if (defined WEBRTC_DETECT_ARM_NEON || defined WEBRTC_ARCH_ARM_NEON)
+// Initialize function pointers for ARM Neon platform.
+static void WebRtcNsx_InitNeon(void) {
+  WebRtcNsx_NoiseEstimation = WebRtcNsx_NoiseEstimationNeon;
+  WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrumNeon;
+  WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdateNeon;
+  WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdateNeon;
+  WebRtcNsx_Denormalize = WebRtcNsx_DenormalizeNeon;
+  WebRtcNsx_CreateComplexBuffer = WebRtcNsx_CreateComplexBufferNeon;
+}
+#endif
+
 // Update the noise estimation information.
 static void UpdateNoiseEstimate(NsxInst_t* inst, int offset) {
  WebRtc_Word32 tmp32no1 = 0;
@ -1881,8 +1893,11 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
  int q_domain_to_use = 0;

  // Code for ARMv7-Neon platform assumes the following:
+  assert(inst->anaLen > 0);
+  assert(inst->anaLen2 > 0);
  assert(inst->anaLen % 16 == 0);
  assert(inst->anaLen2 % 8 == 0);
+  assert(inst->blockLen10ms > 0);
  assert(inst->blockLen10ms % 16 == 0);
  assert(inst->magnLen == inst->anaLen2 + 1);

--- a/src/modules/audio_processing/ns/nsx_core.h
+++ b/src/modules/audio_processing/ns/nsx_core.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -206,10 +206,26 @@ typedef void (*CreateComplexBuffer)(NsxInst_t* inst,
                                    int16_t* out);
 extern CreateComplexBuffer WebRtcNsx_CreateComplexBuffer;

-/****************************************************************************
- * Initialization of the above function pointers for ARM Neon.
- */
-void WebRtcNsx_InitNeon(void);
+#if (defined WEBRTC_DETECT_ARM_NEON) || defined (WEBRTC_ARCH_ARM_NEON)
+// For the above function pointers, functions for generic platforms are declared
+// and defined as static in file nsx_core.c, while those for ARM Neon platforms
+// are declared below and defined in file nsx_core_neon.S.
+void WebRtcNsx_NoiseEstimationNeon(NsxInst_t* inst,
+                                   uint16_t* magn,
+                                   uint32_t* noise,
+                                   int16_t* q_noise);
+void WebRtcNsx_CreateComplexBufferNeon(NsxInst_t* inst,
+                                       int16_t* in,
+                                       int16_t* out);
+void WebRtcNsx_SynthesisUpdateNeon(NsxInst_t* inst,
+                                   int16_t* out_frame,
+                                   int16_t gain_factor);
+void WebRtcNsx_AnalysisUpdateNeon(NsxInst_t* inst,
+                                  int16_t* out,
+                                  int16_t* new_speech);
+void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor);
+void WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buff);
+#endif

 extern const WebRtc_Word16 WebRtcNsx_kLogTable[9];
 extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256];
--- a/src/modules/audio_processing/ns/nsx_core_neon.S
+++ b/src/modules/audio_processing/ns/nsx_core_neon.S
@ -0,0 +1,682 @@
+@
+@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+@
+@ Use of this source code is governed by a BSD-style license
+@ that can be found in the LICENSE file in the root of the source
+@ tree. An additional intellectual property rights grant can be found
+@ in the file PATENTS.  All contributing project authors may
+@ be found in the AUTHORS file in the root of the source tree.
+@
+
+@ nsx_core_neon.s
+@ This file contains some functions in NS, optimized for ARM Neon
+@ platforms. Reference C code is in file nsx_core.c. Bit-exact.
+
+.arch armv7-a
+.fpu neon
+
+#include "nsx_defines.h"
+#include "nsx_core_neon_offsets.h"
+
+.global WebRtcNsx_NoiseEstimationNeon
+.global WebRtcNsx_PrepareSpectrumNeon
+.global WebRtcNsx_SynthesisUpdateNeon
+.global WebRtcNsx_AnalysisUpdateNeon
+.global WebRtcNsx_DenormalizeNeon
+.global WebRtcNsx_CreateComplexBufferNeon
+
+@ void NoiseEstimationNeon(NsxInst_t* inst,
+@                          uint16_t* magn,
+@                          uint32_t* noise,
+@                          int16_t* q_noise);
+
+@ Register usage (across major loops of NoiseEstimationNeon()):
+@ r0-r3: function arguments, and scratch registers.
+@ r4: &inst
+@ r5: &noiseEstLogQuantile[]
+@ r6: inst->magnLen
+@ r7: offset
+@ r8: s, the loop counter for the LOOP_SIMULT
+@ r9: &inst->noiseEstDensity[]
+@ r10: &inst->noiseEstCounter[]
+@ r11: countDiv
+@ r12: i, the loop counter for LOOP_NOISEESTIMATION_MAGNLEN_INNER
+
+WebRtcNsx_NoiseEstimationNeon:
+.fnstart
+.save {r4-r11, r14}
+.vsave {d8-d15}
+.pad #(16 + (HALF_ANAL_BLOCKL + 3) / 4 * 8)
+
+  push {r4-r11, r14}
+  vpush {d8-d15}
+  sub sp, #(16 + (HALF_ANAL_BLOCKL + 3) / 4 * 8)
+
+@ [sp, #0]: logval
+@ [sp, #4]: noise
+@ [sp, #8]: q_noise
+@ [sp, #12]: factor
+@ [sp, #16 ~ #(16 + (HALF_ANAL_BLOCKL + 3) / 4 * 8)]: lmagn[HALF_ANAL_BLOCKL]
+
+  str r2, [sp, #4]            @ noise
+  str r3, [sp, #8]            @ q_noise
+  movw r4, #offset_nsx_normData
+  ldr r2, [r0, #offset_nsx_stages]            @ inst->stages
+  ldr r4, [r0, r4]            @ inst->normData
+  ldr r12, =WebRtcNsx_kLogTable
+  subs r3, r2, r4             @ tabind = inst->stages - inst->normData;
+  ldr r5, [r0, #offset_nsx_magnLen]            @ magnLen
+  rsblt r3, #0
+  lsl r3, #1
+  ldrh r3, [r12, r3]          @ logval = WebRtcNsx_kLogTable[tabind];
+  add r12, sp, #16            @ lmagn[]
+  rsblt r3, #0                @ logval = -WebRtcNsx_kLogTable[-tabind];
+  str r3, [sp]
+  vdup.16 q15, r3
+
+  ldr r9, =WebRtcNsx_kLogTableFrac
+
+LOOP_SET_LMAGN:
+  ldrh r2, [r1], #2           @ magn[i]
+  cmp r2, #0
+  streqh r3, [r12], #2        @ lmagn[i] = logval;
+  beq CHECK_LMAGN_COUNTER
+
+  clz r6, r2
+  mov r4, r6                  @ zeros
+  rsb r6, #31
+  lsl r2, r4
+  ubfx r4, r2, #23, #8
+  mov r2, r4, lsl #1
+  ldrh r4, [r9, r2]           @ WebRtcNsx_kLogTableFrac[frac]
+  add r7, r4, r6, lsl #8      @ log2
+  movw r2, #22713             @ log2_const
+  smulbb r2, r7, r2
+  add r2, r3, r2, lsr #15
+  strh r2, [r12], #2          @ lmagn[i]
+
+CHECK_LMAGN_COUNTER:
+  subs r5, #1
+  bgt LOOP_SET_LMAGN
+
+  movw r3, #21845             @ width_factor
+  vdup.16 q5, r3
+  vmov.s16 q14, #WIDTH_Q8
+
+  movw r5, #offset_nsx_noiseEstLogQuantile
+  movw r7, #offset_nsx_blockIndex
+  movw r9, #offset_nsx_noiseEstDensity
+  add r5, r0
+  ldr r6, [r0, #offset_nsx_magnLen]
+  ldr r7, [r0, r7]
+  add r9, r0
+  cmp r7, #END_STARTUP_LONG
+  add r10, r0, #offset_nsx_noiseEstCounter
+  movge r7, #FACTOR_Q7
+  movlt r7, #FACTOR_Q7_STARTUP
+  mov r4, r0
+  str r7, [sp, #12]           @ factor
+  mov r8, #SIMULT
+  mov r7, #0
+
+LOOP_SIMULT:
+  ldrsh r1, [r10]             @ inst->noiseEstCounter[s]
+  ldr r3, =WebRtcNsx_kCounterDiv
+  mov r11, r1, lsl #1         @ counter
+  ldrh r11, [r3, r11]         @ countDiv = WebRtcNsx_kCounterDiv[counter];
+  sub r12, r6, #1             @ Loop counter.
+  smulbb r3, r1, r11          @ countProd
+  vdup.16 q11, r11
+
+  vqrdmulh.s16 q11, q5, q11   @ WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                              @   width_factor, countDiv, 15);
+  vdup.16 d24, r11
+  vdup.16 d25, r3
+
+  ldr r3, [sp, #12]           @ factor
+  add r1, sp, #16             @ &lmagn[0]
+  vdup.16 q9, r3
+  vmov.i16 q13, #512
+  vmov.i16 q7, #15
+  vmov.i32 q6, #FACTOR_Q16
+
+LOOP_NOISEESTIMATION_MAGNLEN_INNER:
+  vld1.16 {q0}, [r9]          @ noiseEstDensity[offset + i]
+
+  @ Compute delta in the next two blocks.
+  vclz.i16 q4, q0
+  vsub.i16 q4, q4, q7         @ Value of the shift factors; likely negative.
+  vmovl.s16 q3, d8
+  vmovl.s16 q2, d9
+
+  vshl.s32 q1, q6, q3
+  vmovn.i32 d8, q1            @ d8 holds shifted FACTOR_Q16.
+  vshl.s32 q1, q6, q2
+  vcgt.s16 q3, q0, q13        @ Compare noiseEstDensity to 512.
+  vmovn.i32 d9, q1            @ d9 holds shifted FACTOR_Q16.
+  vmov.i16 q1, q9
+  vbit.s16 q1, q4, q3         @ If bigger than 512, delta = shifted FACTOR_Q16.
+
+  vmull.s16 q8, d3, d24
+  vmull.s16 q4, d2, d24
+  vshrn.i32 d2, q4, #14
+  vshrn.i32 d3, q8, #14
+
+  vrshr.s16 q3, q1, #1
+  vrshr.s16 q8, q1, #2
+  vmull.s16 q4, d7, d28
+  vmull.s16 q3, d6, d28
+  vld1.16 {q10}, [r5]         @ inst->noiseEstLogQuantile[offset + i]
+  vshrn.i32 d4, q3, #1
+  vshrn.i32 d5, q4, #1
+
+  vld1.16 {q3}, [r1]!         @ lmagn[i]
+  vsub.i16 q4, q10, q2
+  vadd.i16 q8, q10, q8
+  vsub.i16 q2, q3, q10
+  vmax.s16 q4, q4, q15
+  vcgt.s16 q1, q2, #0
+  vbit q10, q8, q1
+  vbif q10, q4, q1
+
+  vsub.i16 q1, q3, q10
+  vst1.16 {q10}, [r5]!        @ inst->noiseEstLogQuantile[offset + i]
+  vabs.s16 q4, q1
+  vqrdmulh.s16 d2, d0, d25
+  vqrdmulh.s16 d3, d1, d25
+  vcgt.s16 q4, q14, q4
+  vadd.i16 q1, q1, q11
+  vbit q0, q1, q4
+  subs r12, #8
+  vst1.16 {q0}, [r9]!         @ noiseEstDensity[offset + i]
+  bgt LOOP_NOISEESTIMATION_MAGNLEN_INNER
+
+@
+@ Last iteration over magnitude spectrum.
+@
+
+COMPUTE_DELTA:
+  ldrsh r2, [r9]              @ inst->noiseEstDensity[offset + i]
+  cmp r2, #512
+  bgt COMPUTE_DELTA_BIGGER_DENSITY
+
+  movw r2, #offset_nsx_blockIndex
+  ldr r0, [r4, r2]
+  cmp r0, #END_STARTUP_LONG
+  movge r0, #FACTOR_Q7          @ delta
+  movlt r0, #FACTOR_Q7_STARTUP  @ delta
+  b UPDATE_LOG_QUANTILE_ESTIMATE
+
+COMPUTE_DELTA_BIGGER_DENSITY:
+  clz r2, r2
+  rsb r0, r2, #31             @ 14 - factor
+  mov r2, #FACTOR_Q16
+  mov r0, r2, lsr r0          @ FACTOR_Q16 >> (14 - factor)
+
+UPDATE_LOG_QUANTILE_ESTIMATE:
+  smulbb r12, r0, r11
+  ldrsh r1, [r1]              @ lmagn[i]
+  ubfx r12, r12, #14, #16     @ tmp16
+  ldrsh r2, [r5]              @ inst->noiseEstLogQuantile[offset + i]
+  cmp r1, r2
+  bgt UPDATE_LOG_QUANTILE_ESTIMATE_BIGGER_LMAGN
+
+  add r12, #1
+  ldr r3, [sp]                @ logval
+  mov r0, r12, lsr #1         @ tmp16no1
+  mov r12, #3
+  smulbb r12, r0, r12         @ tmp16no2
+  sub r2, r12, lsr #1
+  cmp r3, r2
+  ldrgt r2, [sp]
+  ldrgt r3, [sp]
+  b UPDATE_LOG_QUANTILE_ESTIMATE_STORE
+
+UPDATE_LOG_QUANTILE_ESTIMATE_BIGGER_LMAGN:
+  add r3, r12, #2
+  add r2, r3, lsr #2
+
+UPDATE_LOG_QUANTILE_ESTIMATE_STORE:
+  vmov.s16 r0, d25[0]         @ countProd
+  strh r2, [r5]
+  add r5, #2                  @ increment &noiseEstLogQuantile[offset + i]
+
+UPDATE_DENSITY_ESTIMATE:
+  subs r12, r1, r2
+  rsblt r12, #0
+  cmp r12, #WIDTH_Q8
+  bge UPDATE_DENSITY_ESTIMATE_CHECK_COUNTER
+
+  movw r3, #21845             @ width_factor
+  ldrh r12, [r9]              @ inst->noiseEstDensity[offset + i]
+  smulbb r2, r3, r11
+  smulbb r1, r12, r0
+  add r0, r2, #1 << 14        @ Rounding
+  add r12, r1, #1 << 14
+  mov r1, r12, lsr #15
+  add r3, r1, r0, lsr #15
+  strh r3, [r9]               @ inst->noiseEstDensity[offset + i]
+
+UPDATE_DENSITY_ESTIMATE_CHECK_COUNTER:
+  add r9, #2                  @ updata &noiseEstDensity[offset + i]
+  ldrsh r3, [r10]             @ inst->noiseEstCounter[s]
+  cmp r3, #END_STARTUP_LONG
+  blt POST_UPDATE_DENSITY_ESTIMATE
+
+  movw r2, #offset_nsx_blockIndex
+  mov r12, #0
+  ldr r2, [r4, r2]
+  strh r12, [r10]
+  cmp r2, #END_STARTUP_LONG
+  blt POST_UPDATE_DENSITY_ESTIMATE
+
+  mov r0, r4
+  mov r1, r7
+  bl UpdateNoiseEstimateNeon
+
+POST_UPDATE_DENSITY_ESTIMATE:
+  ldrh r3, [r10]
+  add r3, #1
+  strh r3, [r10], #2
+  subs r8, #1
+  add r7, r6                  @ offset += inst->magnLen;
+  bgt LOOP_SIMULT
+
+  movw r2, #offset_nsx_blockIndex
+  ldr r2, [r4, r2]
+  cmp r2, #END_STARTUP_LONG
+  bge UPDATE_NOISE
+
+  sub r1, r7, r6
+  mov r0, r4
+  bl UpdateNoiseEstimateNeon
+
+UPDATE_NOISE:
+  movw r1, #offset_nsx_noiseEstQuantile
+  add r1, r4
+  ldr r2, [sp, #4]
+
+@ Initial value of loop counter r6 = inst->magnLen.
+LOOP_UPDATE_NOISE:
+  ldrsh r0, [r1], #2
+  subs r6, #1
+  str r0, [r2], #4
+  bgt LOOP_UPDATE_NOISE
+
+UPDATE_Q_NOISE:
+  movw r2, #offset_nsx_qNoise
+  ldr r1, [sp, #8]
+  ldrh r2, [r4, r2]
+  strh r2, [r1]
+
+  add sp, #(16 + (HALF_ANAL_BLOCKL + 3) / 4 * 8)
+  vpop {d8-d15}
+  pop {r4-r11, pc}
+.fnend
+
+@ static void UpdateNoiseEstimateNeon(NsxInst_t* inst, int offset);
+@ Neon registers touched: q0-q3, q8-q13.
+UpdateNoiseEstimateNeon:
+.fnstart
+.save {r4, r5, r6, r14}
+
+  push {r4, r5, r6, r14}
+  mov r5, r0
+
+  vmov.i32 q10, #21
+  vmov.i32 q11, #0x1FFFFF
+  vmov.i32 q9, #0x200000
+
+  movw r0, #offset_nsx_noiseEstLogQuantile
+  movw r6, #offset_nsx_magnLen
+  add r0, r5                  @ &inst->noiseEstLogQuantile
+  add r4, r0, r1, lsl #1      @ &inst->noiseEstLogQuantile[offset]
+  ldrsh r6, [r5, r6]          @ &inst->magnLen
+
+  mov r0, r4
+  mov r1, r6
+  bl WebRtcSpl_MaxValueW16
+
+  sub r12, r6, #1             @ Loop counter: inst->magnLen - 1.
+
+  movw r6, #11819             @ kExp2Const in Q13
+  movw r2, #offset_nsx_noiseEstQuantile
+  vdup.16 d16, r6
+  smulbb r3, r6, r0
+  add r0, r3, #1 << 20        @ Round
+  movw r1, #offset_nsx_qNoise
+  mov r0, r0, lsr #21
+  rsb r0, r0, #14             @ 14 - (round(kExp2Const * tmp16) >> 21)
+  add r2, r5                  @ &inst->noiseEstQuantile
+  vdup.32 q13, r0
+  str r0, [r5, r1]
+
+
+LOOP_UPDATE:
+  vld1.16 {d0, d1}, [r4]!     @ &inst->noiseEstLogQuantile[offset + i]
+  vmull.s16 q1, d0, d16
+  vmull.s16 q0, d1, d16
+  vshr.s32 q3, q1, #21
+  vshr.s32 q2, q0, #21
+  vand q1, q1, q11
+  vand q0, q0, q11
+  vsub.i32 q3, q3, q10
+  vsub.i32 q2, q2, q10
+  vorr q1, q1, q9
+  vorr q0, q0, q9
+  vadd.i32 q3, q3, q13
+  vadd.i32 q2, q2, q13
+  vshl.s32 q1, q1, q3
+  vshl.s32 q0, q0, q2
+  vqmovn.s32 d1, q0
+  vqmovn.s32 d0, q1
+  subs r12, #8
+  vst1.16 {d0, d1}, [r2]!
+  bgt LOOP_UPDATE
+
+POST_LOOP_MAGNLEN:
+  ldrh r1, [r4]
+  smulbb r3, r6, r1           @ kExp2Const * ptr_noiseEstLogQuantile[offset + i]
+  mov r12, #0x00200000
+  bfi r12, r3, #0, #21        @ tmp32no1 = 0x00200000 | (tmp32no2 & 0x001FFFFF);
+  rsb r0, #21                 @ 21 - &inst->qNoise
+  sub r14, r0, r3, lsr #21    @ -tmp16
+  mov r0, r12, lsr r14
+  ssat r3, #16, r0
+  strh r3, [r2]
+
+  pop {r4, r5, r6, pc}
+.fnend
+
+@ void PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf);
+WebRtcNsx_PrepareSpectrumNeon:
+.fnstart
+.save {r4-r8}
+
+  push {r4-r8}
+
+  movw r2, #offset_nsx_real
+  movw r12, #offset_nsx_noiseSupFilter
+  movw r4, #offset_nsx_imag
+  movw r5, #offset_nsx_magnLen
+
+  add r2, r0                  @ &inst->real[0]
+  add r4, r0                  @ &inst->image[0]
+  mov r9, r4                  @ &inst->image[0]
+  mov r3, r2                  @ &inst->real[0]
+  ldr r5, [r0, r5]            @ inst->magnLen
+  add r6, r4, #2              @ &inst->image[1]
+  sub r5, #1
+  add r12, r0                 @ &inst->noiseSupFilter[0]
+  add r5, r2, r5, lsl #1      @ &inst->real[inst->magnLen - 1]
+
+LOOP_MAGNLEN:
+  @ Filter the elements.
+  vld1.16 {d20, d21}, [r2]    @ inst->real[]
+  vld1.16 {d24, d25}, [r12]!  @ inst->noiseSupFilter[]
+  vld1.16 {d22, d23}, [r4]    @ inst->imag[]
+  vmull.s16 q0, d20, d24
+  vmull.s16 q1, d21, d25
+  vmull.s16 q2, d22, d24
+  vmull.s16 q3, d23, d25
+  vshrn.s32 d0, q0, #14
+  vshrn.s32 d1, q1, #14
+  vshrn.s32 d2, q2, #14
+  vshrn.s32 d3, q3, #14
+  vst1.16 {d0, d1}, [r2]!
+  vst1.16 {d2, d3}, [r4]!
+  cmp r2, r5
+  bcc LOOP_MAGNLEN
+
+  @ Last two elements to filter:
+  ldrh r7, [r2]
+  ldrh r8, [r12]
+  ldrh r5, [r4]
+  smulbb r7, r7, r8
+  smulbb r5, r5, r8
+  mov r7, r7, lsr #14
+  mov r8, r5, lsr #14
+  strh r7, [r2]
+  strh r8, [r4]
+
+  ldr r5, [r0, #offset_nsx_anaLen2]            @ inst->anaLen2
+  ldr r7, [r0, #offset_nsx_anaLen]            @ inst->anaLen
+  add r5, r3, r5, lsl #1      @ &inst->real[inst->anaLen2]
+
+  ldrh r2, [r3], #2           @ inst->real[0]
+  ldrh r0, [r9]               @ inst->imag[0]
+  strh r2, [r1], #2           @ Store to freq_buf[0]
+  rsb r0, r0, #0
+  strh r0, [r1], #2           @ Store to freq_buf[1]. Now r1 -> &freq_buf[2]
+
+  add r2, r1, r7, lsl #2
+  sub r2, #36                 @ &freq_buf[-16]
+
+  mvn r12, #0x1F              @ -32
+
+@ At the last iteration, &freq_buf[inst->anaLen + 1] will be written to by both
+@ the vst1 instructions. Only the 2nd vst1 instruction has the correct value
+@ (-inst->imag[inst->anaLen2]), so the order of the two vst1's is important.
+LOOP_ANALEN2:
+  vld1.16 {d0, d1}, [r3]!     @ inst->real[], starting from inst->real[1]
+  vld1.16 {d2, d3}, [r6]!     @ inst->imag[], starting from inst->imag[1]
+  vmov.s16 d4, d0
+  vmov.s16 d6, d1
+  vneg.s16 d5, d2
+  vneg.s16 d7, d3
+  vzip.16 d0, d2
+  vzip.16 d1, d3
+  vzip.16 d4, d5
+  vzip.16 d6, d7
+  vrev64.32 d16, d3
+  vrev64.32 d17, d1
+  vrev64.32 d18, d2
+  vrev64.32 d19, d0
+  cmp r3, r5
+  vst1.16 {d16, d17, d18, d19}, [r2], r12
+  vst1.16 {d4, d5, d6, d7}, [r1]!
+  bls LOOP_ANALEN2
+
+  pop {r4-r8}
+  bx r14
+.fnend
+
+@ void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor);
+WebRtcNsx_DenormalizeNeon:
+.fnstart
+  movw r12, #offset_nsx_normData
+  movw r3, #offset_nsx_real
+  ldr r12, [r0, r12]          @ inst->normData
+  add r3, r0                  @ &inst->real[0]
+  sub r2, r12
+  vdup.32 q10, r2
+
+  movw r2, #offset_nsx_anaLen
+  ldrsh r2, [r0, r2]          @ inst->anaLen
+  add r0, r3, r2, lsl #1      @ &inst->real[inst->anaLen]
+
+LOOP_ANALEN:
+  vld2.16 {d0, d1}, [r1]!     @ &in[]
+  vld2.16 {d2, d3}, [r1]!     @ &in[]
+  vmovl.s16 q2, d0
+  vmovl.s16 q3, d2
+  vshl.s32 q2, q10
+  vshl.s32 q3, q10
+  vqmovn.s32 d0, q2
+  vqmovn.s32 d1, q3
+  vst1.16 {d0, d1}, [r3]!     @ inst->real[]
+  cmp r3, r0
+  blt LOOP_ANALEN
+
+  bx r14
+.fnend
+
+@ void SynthesisUpdateNeon(NsxInst_t* inst,
+@                          int16_t* out_frame,
+@                          int16_t gain_factor);
+WebRtcNsx_SynthesisUpdateNeon:
+.fnstart
+.save {r4, r5}
+  push {r4, r5}
+
+  vdup.16 d31, r2
+
+  movw r2, #offset_nsx_anaLen
+  movw r4, #offset_nsx_real
+  movw r12, #offset_nsx_synthesisBuffer
+
+  ldrsh r5, [r0, r2]          @ inst->anaLen
+  add r12, r0                 @ &inst->synthesisBuffer[0];
+  ldr r3, [r0, #offset_nsx_window]            @ &inst->window[0]
+  add r4, r0                  @ &inst->real[0]
+  add r5, r12, r5, lsl #1     @ &inst->synthesisBuffer[inst->anaLen]
+
+  mov r2, r12                 @ &inst->synthesisBuffer[0];
+
+LOOP_SYNTHESIS:
+  vld1.16 {d0, d1}, [r4]!     @ inst->real[]
+  vld1.16 {d2, d3}, [r3]!     @ inst->window[]
+  vld1.16 {d4, d5}, [r2]      @ inst->synthesisBuffer[];
+  vmull.s16 q3, d0, d2
+  vmull.s16 q8, d1, d3
+  vrshrn.i32 d0, q3, #14
+  vrshrn.i32 d1, q8, #14
+  vmull.s16 q3, d31, d0
+  vmull.s16 q8, d31, d1
+  vqrshrn.s32 d0, q3, #13
+  vqrshrn.s32 d1, q8, #13
+  vqadd.s16 d4, d0
+  vqadd.s16 d5, d1
+  vst1.16 {d4, d5}, [r2]!
+  cmp r2, r5
+  blt LOOP_SYNTHESIS
+
+POST_LOOP_SYNTHESIS:
+  movw r3, #offset_nsx_blockLen10ms
+  ldr r2, [r0, r3]
+  mov r3, r12                 @ &inst->synthesisBuffer[0];
+  add r0, r12, r2, lsl #1     @ &inst->synthesisBuffer[inst->blockLen10ms]
+
+LOOP_BLOCKLEN10MS:
+  vld1.16 {q0, q1}, [r3]!     @ inst->synthesisBuffer[];
+  cmp r3, r0
+  vst1.16 {q0, q1}, [r1]!     @ out_frame[]
+  blt LOOP_BLOCKLEN10MS
+
+  cmp r0, r5
+  bge POST_LOOP_MEMCPY
+
+LOOP_MEMCPY:
+  vld1.16 {q0, q1}, [r0]!     @ inst->synthesisBuffer[i + inst->blockLen10ms]
+  cmp r0, r5
+  vst1.16 {q0, q1}, [r12]!    @ inst->synthesisBuffer[i]
+  blt LOOP_MEMCPY
+
+POST_LOOP_MEMCPY:
+  cmp r12, r5
+  vmov.i16 q10, #0
+  vmov.i16 q11, #0
+  bge EXIT_SYNTHESISUPDATE
+
+LOOP_ZEROSARRAY:
+  vst1.16 {q10, q11}, [r12]!  @ inst->synthesisBuffer[i + inst->anaLen]
+  cmp r12, r5
+  blt LOOP_ZEROSARRAY
+
+EXIT_SYNTHESISUPDATE:
+  pop {r4, r5}
+  bx r14
+
+.fnend
+
+@ void AnalysisUpdateNeon(NsxInst_t* inst, int16_t* out, int16_t* new_speech);
+WebRtcNsx_AnalysisUpdateNeon:
+.fnstart
+.save {r4-r6}
+  push {r4-r6}
+
+  movw r3, #offset_nsx_analysisBuffer
+  movw r4, #offset_nsx_anaLen
+  movw r12, #offset_nsx_blockLen10ms
+  add r3, r0                  @ &inst->analysisBuffer[0]
+  ldrsh r4, [r0, r4]          @ inst->anaLen
+  ldr r12, [r0, r12]          @ inst->blockLen10ms
+  sub r6, r4, r12
+  add r6, r3, r6, lsl #1      @ &inst->analysisBuffer[inst->anaLen
+                              @     - inst->blockLen10ms]
+  cmp r3, r6
+  mov r5, r3
+  bge POST_LOOP_MEMCPY_1
+
+  add r12, r3, r12, lsl #1    @ &inst->analysisBuffer[inst->blockLen10ms]
+
+LOOP_MEMCPY_1:
+  vld1.16 {q10, q11}, [r12]!  @ inst->analysisBuffer[i + inst->blockLen10ms]
+  vst1.16 {q10, q11}, [r5]!   @ inst->analysisBuffer[i]
+  cmp r5, r6
+  blt LOOP_MEMCPY_1
+
+POST_LOOP_MEMCPY_1:
+  add r12, r3, r4, lsl #1     @ &inst->analysisBuffer[inst->anaLen]
+  cmp r5, r12
+  bge POST_LOOP_MEMCPY_2
+
+LOOP_MEMCPY_2:
+  vld1.16 {q10, q11}, [r2]!   @ new_speech[i]
+  vst1.16 {q10, q11}, [r5]!   @ inst->analysisBuffer[
+                              @     i + inst->anaLen - inst->blockLen10ms]
+  cmp r5, r12
+  blt LOOP_MEMCPY_2
+
+POST_LOOP_MEMCPY_2:
+  add r4, r1, r4, lsl #1      @ &out[inst->anaLen]
+  cmp r1, r4
+  ldr r2, [r0, #offset_nsx_window]            @ &inst->window[0]
+  bge POST_LOOP_WINDOW_DATA
+
+LOOP_WINDOW_DATA:
+  vld1.16 {d4, d5}, [r3]!     @ inst->analysisBuffer[]
+  vld1.16 {d6, d7}, [r2]!     @ inst->window[]
+  vmull.s16 q0, d4, d6
+  vmull.s16 q1, d5, d7
+  vrshrn.i32 d4, q0, #14
+  vrshrn.i32 d5, q1, #14
+  vst1.16 {d4, d5}, [r1]!     @ out[]
+  cmp r1, r4
+  blt LOOP_WINDOW_DATA
+
+POST_LOOP_WINDOW_DATA:
+  pop {r4-r6}
+  bx r14
+.fnend
+
+@ void CreateComplexBufferNeon(NsxInst_t* inst, int16_t* in, int16_t* out);
+WebRtcNsx_CreateComplexBufferNeon:
+.fnstart
+  movw r3, #offset_nsx_anaLen
+  movw r12, #offset_nsx_normData
+  ldrsh r3, [r0, r3]                  @ inst->anaLen
+  ldr r12, [r0, r12]                  @ inst->normData
+  add r3, r1, r3, lsl #1              @ &in[inst->anaLen]
+
+  vmov.i16 d7, #0                     @ For writing to imaginary parts.
+  vmov.i16 d5, #0                     @ For writing to imaginary parts.
+  vdup.i16 q10, r12
+
+LOOP_CREATE_COMPLEX_BUFFER:           @ Unrolled by 16.
+  vld1.16 {d0, d1, d2, d3}, [r1]!     @ in[]
+  cmp r1, r3
+  vshl.s16 q0, q10
+  vshl.s16 q1, q10
+  vmov d4, d1
+  vmov d1, d5
+  vmov d6, d3
+  vmov d3, d7
+  vst2.16 {d0, d1}, [r2]!
+  vst2.16 {d4, d5}, [r2]!
+  vst2.16 {d2, d3}, [r2]!
+  vst2.16 {d6, d7}, [r2]!
+  blt LOOP_CREATE_COMPLEX_BUFFER
+
+  bx r14
+.fnend
--- a/src/modules/audio_processing/ns/nsx_core_neon.c
+++ b/src/modules/audio_processing/ns/nsx_core_neon.c
@ -91,10 +91,10 @@ static void UpdateNoiseEstimateNeon(NsxInst_t* inst, int offset) {
 }

 // Noise Estimation
-static void NoiseEstimationNeon(NsxInst_t* inst,
-                                uint16_t* magn,
-                                uint32_t* noise,
-                                int16_t* q_noise) {
+void WebRtcNsx_NoiseEstimationNeon(NsxInst_t* inst,
+                                   uint16_t* magn,
+                                   uint32_t* noise,
+                                   int16_t* q_noise) {
  int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv;
  int16_t countProd, delta, zeros, frac;
  int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
@ -320,7 +320,7 @@ static void NoiseEstimationNeon(NsxInst_t* inst,
 }

 // Filter the data in the frequency domain, and create spectrum.
-static void PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf) {
+void WebRtcNsx_PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf) {

  // (1) Filtering.

@ -455,7 +455,7 @@ static void PrepareSpectrumNeon(NsxInst_t* inst, int16_t* freq_buf) {
 }

 // Denormalize the input buffer.
-static __inline void DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor) {
+void WebRtcNsx_DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor) {
  int16_t* ptr_real = &inst->real[0];
  int16_t* ptr_in = &in[0];

@ -494,9 +494,9 @@ static __inline void DenormalizeNeon(NsxInst_t* inst, int16_t* in, int factor) {

 // For the noise supress process, synthesis, read out fully processed segment,
 // and update synthesis buffer.
-static void SynthesisUpdateNeon(NsxInst_t* inst,
-                                int16_t* out_frame,
-                                int16_t gain_factor) {
+void WebRtcNsx_SynthesisUpdateNeon(NsxInst_t* inst,
+                                   int16_t* out_frame,
+                                   int16_t gain_factor) {
  int16_t* ptr_real = &inst->real[0];
  int16_t* ptr_syn = &inst->synthesisBuffer[0];
  const int16_t* ptr_window = &inst->window[0];
@ -605,9 +605,9 @@ static void SynthesisUpdateNeon(NsxInst_t* inst,
 }

 // Update analysis buffer for lower band, and window data before FFT.
-static void AnalysisUpdateNeon(NsxInst_t* inst,
-                               int16_t* out,
-                               int16_t* new_speech) {
+void WebRtcNsx_AnalysisUpdateNeon(NsxInst_t* inst,
+                                  int16_t* out,
+                                  int16_t* new_speech) {

  int16_t* ptr_ana = &inst->analysisBuffer[inst->blockLen10ms];
  int16_t* ptr_out = &inst->analysisBuffer[0];
@ -682,9 +682,9 @@ static void AnalysisUpdateNeon(NsxInst_t* inst,

 // Create a complex number buffer (out[]) as the intput (in[]) interleaved with
 // zeros, and normalize it.
-static __inline void CreateComplexBufferNeon(NsxInst_t* inst,
-                                             int16_t* in,
-                                             int16_t* out) {
+void WebRtcNsx_CreateComplexBufferNeon(NsxInst_t* inst,
+                                       int16_t* in,
+                                       int16_t* out) {
  int16_t* ptr_out = &out[0];
  int16_t* ptr_in = &in[0];

@ -723,12 +723,3 @@ static __inline void CreateComplexBufferNeon(NsxInst_t* inst,
    );
  }
 }
-
-void WebRtcNsx_InitNeon(void) {
-  WebRtcNsx_NoiseEstimation = NoiseEstimationNeon;
-  WebRtcNsx_PrepareSpectrum = PrepareSpectrumNeon;
-  WebRtcNsx_SynthesisUpdate = SynthesisUpdateNeon;
-  WebRtcNsx_AnalysisUpdate = AnalysisUpdateNeon;
-  WebRtcNsx_Denormalize = DenormalizeNeon;
-  WebRtcNsx_CreateComplexBuffer = CreateComplexBufferNeon;
-}
--- a/src/modules/audio_processing/ns/nsx_core_neon_offsets.c
+++ b/src/modules/audio_processing/ns/nsx_core_neon_offsets.c
@ -0,0 +1,34 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "nsx_core.h"
+
+#include <stddef.h>
+
+// Define offset variables that will be compiled and abstracted to constant
+// defines, which will then only be used in ARM assembly code.
+int offset_nsx_anaLen = offsetof(NsxInst_t, anaLen);
+int offset_nsx_anaLen2 = offsetof(NsxInst_t, anaLen2);
+int offset_nsx_normData = offsetof(NsxInst_t, normData);
+int offset_nsx_analysisBuffer = offsetof(NsxInst_t, analysisBuffer);
+int offset_nsx_synthesisBuffer = offsetof(NsxInst_t, synthesisBuffer);
+int offset_nsx_blockLen10ms = offsetof(NsxInst_t, blockLen10ms);
+int offset_nsx_window = offsetof(NsxInst_t, window);
+int offset_nsx_real = offsetof(NsxInst_t, real);
+int offset_nsx_imag = offsetof(NsxInst_t, imag);
+int offset_nsx_noiseSupFilter = offsetof(NsxInst_t, noiseSupFilter);
+int offset_nsx_magnLen = offsetof(NsxInst_t, magnLen);
+int offset_nsx_noiseEstLogQuantile = offsetof(NsxInst_t, noiseEstLogQuantile);
+int offset_nsx_noiseEstQuantile = offsetof(NsxInst_t, noiseEstQuantile);
+int offset_nsx_qNoise = offsetof(NsxInst_t, qNoise);
+int offset_nsx_stages = offsetof(NsxInst_t, stages);
+int offset_nsx_blockIndex = offsetof(NsxInst_t, blockIndex);
+int offset_nsx_noiseEstCounter = offsetof(NsxInst_t, noiseEstCounter);
+int offset_nsx_noiseEstDensity = offsetof(NsxInst_t, noiseEstDensity);
--- a/src/modules/audio_processing/ns/nsx_defines.h
+++ b/src/modules/audio_processing/ns/nsx_defines.h
@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@ -11,49 +11,53 @@
 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_
 #define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_

-#define ANAL_BLOCKL_MAX         256 // max analysis block length
-#define HALF_ANAL_BLOCKL        129 // half max analysis block length + 1
+#define ANAL_BLOCKL_MAX         256 /* Max analysis block length */
+#define HALF_ANAL_BLOCKL        129 /* Half max analysis block length + 1 */
 #define SIMULT                  3
 #define END_STARTUP_LONG        200
 #define END_STARTUP_SHORT       50
-#define FACTOR_Q16              (WebRtc_Word32)2621440 // 40 in Q16
-#define FACTOR_Q7               (WebRtc_Word16)5120 // 40 in Q7
-#define FACTOR_Q7_STARTUP       (WebRtc_Word16)1024 // 8 in Q7
-#define WIDTH_Q8                3 // 0.01 in Q8 (or 25 )
-//PARAMETERS FOR NEW METHOD
-#define DD_PR_SNR_Q11           2007 // ~= Q11(0.98) DD update of prior SNR
-#define ONE_MINUS_DD_PR_SNR_Q11 41 // DD update of prior SNR
-#define SPECT_FLAT_TAVG_Q14     4915 // (0.30) tavg parameter for spectral flatness measure
-#define SPECT_DIFF_TAVG_Q8      77 // (0.30) tavg parameter for spectral flatness measure
-#define PRIOR_UPDATE_Q14        1638 // Q14(0.1) update parameter of prior model
-#define NOISE_UPDATE_Q8         26 // 26 ~= Q8(0.1) update parameter for noise
-// probability threshold for noise state in speech/noise likelihood
-#define ONE_MINUS_PROB_RANGE_Q8 205 // 205 ~= Q8(0.8)
-#define HIST_PAR_EST            1000 // histogram size for estimation of parameters
-//FEATURE EXTRACTION CONFIG
-//bin size of histogram
+#define FACTOR_Q16              2621440 /* 40 in Q16 */
+#define FACTOR_Q7               5120 /* 40 in Q7 */
+#define FACTOR_Q7_STARTUP       1024 /* 8 in Q7 */
+#define WIDTH_Q8                3 /* 0.01 in Q8 (or 25 ) */
+
+/* PARAMETERS FOR NEW METHOD */
+#define DD_PR_SNR_Q11           2007 /* ~= Q11(0.98) DD update of prior SNR */
+#define ONE_MINUS_DD_PR_SNR_Q11 41 /* DD update of prior SNR */
+#define SPECT_FLAT_TAVG_Q14     4915 /* (0.30) tavg parameter for spectral flatness measure */
+#define SPECT_DIFF_TAVG_Q8      77 /* (0.30) tavg parameter for spectral flatness measure */
+#define PRIOR_UPDATE_Q14        1638 /* Q14(0.1) Update parameter of prior model */
+#define NOISE_UPDATE_Q8         26 /* 26 ~= Q8(0.1) Update parameter for noise */
+
+/* Probability threshold for noise state in speech/noise likelihood. */
+#define ONE_MINUS_PROB_RANGE_Q8 205 /* 205 ~= Q8(0.8) */
+#define HIST_PAR_EST            1000 /* Histogram size for estimation of parameters */
+
+/* FEATURE EXTRACTION CONFIG  */
+/* Bin size of histogram */
 #define BIN_SIZE_LRT            10
-//scale parameters: multiply dominant peaks of the histograms by scale factor to obtain
-// thresholds for prior model
-#define FACTOR_1_LRT_DIFF       6 //for LRT and spectral difference (5 times bigger)
-//for spectral_flatness: used when noise is flatter than speech (10 times bigger)
+/* Scale parameters: multiply dominant peaks of the histograms by scale factor to obtain. */
+/* Thresholds for prior model */
+#define FACTOR_1_LRT_DIFF       6 /* For LRT and spectral difference (5 times bigger) */
+/* For spectral_flatness: used when noise is flatter than speech (10 times bigger). */
 #define FACTOR_2_FLAT_Q10       922
-//peak limit for spectral flatness (varies between 0 and 1)
-#define THRES_PEAK_FLAT         24 // * 2 * BIN_SIZE_FLAT_FX
-//limit on spacing of two highest peaks in histogram: spacing determined by bin size
-#define LIM_PEAK_SPACE_FLAT_DIFF    4 // * 2 * BIN_SIZE_DIFF_FX
-//limit on relevance of second peak:
+/* Peak limit for spectral flatness (varies between 0 and 1) */
+#define THRES_PEAK_FLAT         24 /* * 2 * BIN_SIZE_FLAT_FX */
+/* Limit on spacing of two highest peaks in histogram: spacing determined by bin size. */
+#define LIM_PEAK_SPACE_FLAT_DIFF    4 /* * 2 * BIN_SIZE_DIFF_FX */
+/* Limit on relevance of second peak */
 #define LIM_PEAK_WEIGHT_FLAT_DIFF   2
-#define THRES_FLUCT_LRT         10240 //=20 * inst->modelUpdate; fluctuation limit of LRT feat.
-//limit on the max and min values for the feature thresholds
-#define MAX_FLAT_Q10            38912 //  * 2 * BIN_SIZE_FLAT_FX
-#define MIN_FLAT_Q10            4096 //  * 2 * BIN_SIZE_FLAT_FX
-#define MAX_DIFF                100 // * 2 * BIN_SIZE_DIFF_FX
-#define MIN_DIFF                16 // * 2 * BIN_SIZE_DIFF_FX
-//criteria of weight of histogram peak  to accept/reject feature
-#define THRES_WEIGHT_FLAT_DIFF  154//(int)(0.3*(inst->modelUpdate)) for flatness and difference
-//
-#define STAT_UPDATES            9 // Update every 512 = 1 << 9 block
-#define ONE_MINUS_GAMMA_PAUSE_Q8    13 // ~= Q8(0.05) update for conservative noise estimate
-#define GAMMA_NOISE_TRANS_AND_SPEECH_Q8 3 // ~= Q8(0.01) update for transition and noise region
-#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_
+#define THRES_FLUCT_LRT         10240 /* = 20 * inst->modelUpdate; fluctuation limit of LRT feat. */
+/* Limit on the max and min values for the feature thresholds */
+#define MAX_FLAT_Q10            38912 /*  * 2 * BIN_SIZE_FLAT_FX */
+#define MIN_FLAT_Q10            4096 /*  * 2 * BIN_SIZE_FLAT_FX */
+#define MAX_DIFF                100 /* * 2 * BIN_SIZE_DIFF_FX */
+#define MIN_DIFF                16 /* * 2 * BIN_SIZE_DIFF_FX */
+/* Criteria of weight of histogram peak  to accept/reject feature */
+#define THRES_WEIGHT_FLAT_DIFF  154 /*(int)(0.3*(inst->modelUpdate)) for flatness and difference */
+
+#define STAT_UPDATES            9 /* Update every 512 = 1 << 9 block */
+#define ONE_MINUS_GAMMA_PAUSE_Q8    13 /* ~= Q8(0.05) Update for conservative noise estimate */
+#define GAMMA_NOISE_TRANS_AND_SPEECH_Q8 3 /* ~= Q8(0.01) Update for transition and noise region */
+
+#endif /* WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ */