Support 48kHz in Noise Suppression

Doing the same for the 16-24kHz band than was done in the 8-16kHz. Results look and sound as nice. BUG=webrtc:3146 R=andrew@webrtc.org, bjornv@webrtc.org, kwiberg@webrtc.org Review URL: https://webrtc-codereview.appspot.com/29139004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7865 4adac7df-926f-26a2-2b94-8c16560cd09d
2014-12-10 19:30:57 +00:00
parent d8ca723de7
commit c5ebbd98f5
17 changed files with 240 additions and 219 deletions
--- a/webrtc/modules/audio_processing/audio_buffer.cc
+++ b/webrtc/modules/audio_processing/audio_buffer.cc
@@ -65,6 +65,7 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
    proc_samples_per_channel_(process_samples_per_channel),
    num_proc_channels_(num_process_channels),
    output_samples_per_channel_(output_samples_per_channel),
+    num_bands_(1),
    samples_per_split_channel_(proc_samples_per_channel_),
    mixed_low_pass_valid_(false),
    reference_copied_(false),
@@ -111,6 +112,7 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
  if (proc_samples_per_channel_ == kSamplesPer32kHzChannel ||
      proc_samples_per_channel_ == kSamplesPer48kHzChannel) {
    samples_per_split_channel_ = kSamplesPer16kHzChannel;
+    num_bands_ = proc_samples_per_channel_ / samples_per_split_channel_;
    split_channels_.push_back(new IFChannelBuffer(samples_per_split_channel_,
                                                  num_proc_channels_));
    split_channels_.push_back(new IFChannelBuffer(samples_per_split_channel_,
@@ -121,6 +123,8 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
                                                    num_proc_channels_));
    }
  }
+  bands_.reset(new int16_t*[num_proc_channels_ * kMaxNumBands]);
+  bands_f_.reset(new float*[num_proc_channels_ * kMaxNumBands]);
 }

 AudioBuffer::~AudioBuffer() {}
@@ -216,14 +220,28 @@ int16_t* const* AudioBuffer::channels() {
  return channels_->ibuf()->channels();
 }

-const int16_t* AudioBuffer::split_data_const(int channel, Band band) const {
-  const int16_t* const* chs = split_channels_const(band);
-  return chs ? chs[channel] : NULL;
+const int16_t* const* AudioBuffer::split_bands_const(int channel) const {
+  // This is necessary to make sure that the int16_t data is up to date in the
+  // IFChannelBuffer.
+  // TODO(aluebs): Having to depend on this to get the updated data is bug
+  // prone. One solution is to have ChannelBuffer track the bands as well.
+  for (int i = 0; i < kMaxNumBands; ++i) {
+    int16_t* const* channels =
+        const_cast<int16_t* const*>(split_channels_const(static_cast<Band>(i)));
+    bands_[kMaxNumBands * channel + i] = channels ? channels[channel] : NULL;
+  }
+  return &bands_[kMaxNumBands * channel];
 }

-int16_t* AudioBuffer::split_data(int channel, Band band) {
-  int16_t* const* chs = split_channels(band);
-  return chs ? chs[channel] : NULL;
+int16_t* const* AudioBuffer::split_bands(int channel) {
+  mixed_low_pass_valid_ = false;
+  // This is necessary to make sure that the int16_t data is up to date and the
+  // float data is marked as invalid in the IFChannelBuffer.
+  for (int i = 0; i < kMaxNumBands; ++i) {
+    int16_t* const* channels = split_channels(static_cast<Band>(i));
+    bands_[kMaxNumBands * channel + i] = channels ? channels[channel] : NULL;
+  }
+  return &bands_[kMaxNumBands * channel];
 }

 const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
@@ -260,14 +278,28 @@ float* const* AudioBuffer::channels_f() {
  return channels_->fbuf()->channels();
 }

-const float* AudioBuffer::split_data_const_f(int channel, Band band) const {
-  const float* const* chs = split_channels_const_f(band);
-  return chs ? chs[channel] : NULL;
+const float* const* AudioBuffer::split_bands_const_f(int channel) const {
+  // This is necessary to make sure that the float data is up to date in the
+  // IFChannelBuffer.
+  for (int i = 0; i < kMaxNumBands; ++i) {
+    float* const* channels =
+        const_cast<float* const*>(split_channels_const_f(static_cast<Band>(i)));
+    bands_f_[kMaxNumBands * channel + i] = channels ? channels[channel] : NULL;
+
+  }
+  return &bands_f_[kMaxNumBands * channel];
 }

-float* AudioBuffer::split_data_f(int channel, Band band) {
-  float* const* chs = split_channels_f(band);
-  return chs ? chs[channel] : NULL;
+float* const* AudioBuffer::split_bands_f(int channel) {
+  mixed_low_pass_valid_ = false;
+  // This is necessary to make sure that the float data is up to date and the
+  // int16_t data is marked as invalid in the IFChannelBuffer.
+  for (int i = 0; i < kMaxNumBands; ++i) {
+    float* const* channels = split_channels_f(static_cast<Band>(i));
+    bands_f_[kMaxNumBands * channel + i] = channels ? channels[channel] : NULL;
+
+  }
+  return &bands_f_[kMaxNumBands * channel];
 }

 const float* const* AudioBuffer::split_channels_const_f(Band band) const {
@@ -292,7 +324,7 @@ const int16_t* AudioBuffer::mixed_low_pass_data() {
  assert(num_proc_channels_ == 1 || num_proc_channels_ == 2);

  if (num_proc_channels_ == 1) {
-    return split_data_const(0, kBand0To8kHz);
+    return split_bands_const(0)[kBand0To8kHz];
  }

  if (!mixed_low_pass_valid_) {
@@ -300,8 +332,8 @@ const int16_t* AudioBuffer::mixed_low_pass_data() {
      mixed_low_pass_channels_.reset(
          new ChannelBuffer<int16_t>(samples_per_split_channel_, 1));
    }
-    StereoToMono(split_data_const(0, kBand0To8kHz),
-                 split_data_const(1, kBand0To8kHz),
+    StereoToMono(split_bands_const(0)[kBand0To8kHz],
+                 split_bands_const(1)[kBand0To8kHz],
                 mixed_low_pass_channels_->data(),
                 samples_per_split_channel_);
    mixed_low_pass_valid_ = true;
@@ -346,6 +378,10 @@ int AudioBuffer::samples_per_keyboard_channel() const {
  return input_samples_per_channel_;
 }

+int AudioBuffer::num_bands() const {
+  return num_bands_;
+}
+
 // TODO(andrew): Do deinterleaving and mixing in one step?
 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
  assert(proc_samples_per_channel_ == input_samples_per_channel_);
@@ -404,7 +440,7 @@ void AudioBuffer::CopyLowPassToReference() {
                                   num_proc_channels_));
  }
  for (int i = 0; i < num_proc_channels_; i++) {
-    low_pass_reference_channels_->CopyFrom(split_data_const(i, kBand0To8kHz),
+    low_pass_reference_channels_->CopyFrom(split_bands_const(i)[kBand0To8kHz],
                                           i);
  }
 }
--- a/webrtc/modules/audio_processing/audio_buffer.h
+++ b/webrtc/modules/audio_processing/audio_buffer.h
@@ -27,6 +27,7 @@ namespace webrtc {
 class PushSincResampler;
 class IFChannelBuffer;

+static const int kMaxNumBands = 3;
 enum Band {
  kBand0To8kHz = 0,
  kBand8To16kHz = 1,
@@ -47,6 +48,7 @@ class AudioBuffer {
  int samples_per_channel() const;
  int samples_per_split_channel() const;
  int samples_per_keyboard_channel() const;
+  int num_bands() const;

  // Sample array accessors. Channels are guaranteed to be stored contiguously
  // in memory. Prefer to use the const variants of each accessor when
@@ -55,8 +57,8 @@ class AudioBuffer {
  const int16_t* data_const(int channel) const;
  int16_t* const* channels();
  const int16_t* const* channels_const() const;
-  int16_t* split_data(int channel, Band band);
-  const int16_t* split_data_const(int channel, Band band) const;
+  int16_t* const* split_bands(int channel);
+  const int16_t* const* split_bands_const(int channel) const;
  int16_t* const* split_channels(Band band);
  const int16_t* const* split_channels_const(Band band) const;

@@ -71,8 +73,8 @@ class AudioBuffer {
  const float* data_const_f(int channel) const;
  float* const* channels_f();
  const float* const* channels_const_f() const;
-  float* split_data_f(int channel, Band band);
-  const float* split_data_const_f(int channel, Band band) const;
+  float* const* split_bands_f(int channel);
+  const float* const* split_bands_const_f(int channel) const;
  float* const* split_channels_f(Band band);
  const float* const* split_channels_const_f(Band band) const;

@@ -110,6 +112,7 @@ class AudioBuffer {
  const int proc_samples_per_channel_;
  const int num_proc_channels_;
  const int output_samples_per_channel_;
+  int num_bands_;
  int samples_per_split_channel_;
  bool mixed_low_pass_valid_;
  bool reference_copied_;
@@ -118,6 +121,8 @@ class AudioBuffer {
  const float* keyboard_data_;
  scoped_ptr<IFChannelBuffer> channels_;
  ScopedVector<IFChannelBuffer> split_channels_;
+  scoped_ptr<int16_t*[]> bands_;
+  scoped_ptr<float*[]> bands_f_;
  scoped_ptr<SplittingFilter> splitting_filter_;
  scoped_ptr<ChannelBuffer<int16_t> > mixed_low_pass_channels_;
  scoped_ptr<ChannelBuffer<int16_t> > low_pass_reference_channels_;
--- a/webrtc/modules/audio_processing/echo_cancellation_impl.cc
+++ b/webrtc/modules/audio_processing/echo_cancellation_impl.cc
@@ -89,7 +89,7 @@ int EchoCancellationImpl::ProcessRenderAudio(const AudioBuffer* audio) {
      Handle* my_handle = static_cast<Handle*>(handle(handle_index));
      err = WebRtcAec_BufferFarend(
          my_handle,
-          audio->split_data_const_f(j, kBand0To8kHz),
+          audio->split_bands_const_f(j)[kBand0To8kHz],
          static_cast<int16_t>(audio->samples_per_split_channel()));

      if (err != apm_->kNoError) {
@@ -129,10 +129,10 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) {
      Handle* my_handle = handle(handle_index);
      err = WebRtcAec_Process(
          my_handle,
-          audio->split_data_const_f(i, kBand0To8kHz),
-          audio->split_data_const_f(i, kBand8To16kHz),
-          audio->split_data_f(i, kBand0To8kHz),
-          audio->split_data_f(i, kBand8To16kHz),
+          audio->split_bands_const_f(i)[kBand0To8kHz],
+          audio->split_bands_const_f(i)[kBand8To16kHz],
+          audio->split_bands_f(i)[kBand0To8kHz],
+          audio->split_bands_f(i)[kBand8To16kHz],
          static_cast<int16_t>(audio->samples_per_split_channel()),
          apm_->stream_delay_ms(),
          stream_drift_samples_);
--- a/webrtc/modules/audio_processing/echo_control_mobile_impl.cc
+++ b/webrtc/modules/audio_processing/echo_control_mobile_impl.cc
@@ -95,7 +95,7 @@ int EchoControlMobileImpl::ProcessRenderAudio(const AudioBuffer* audio) {
      Handle* my_handle = static_cast<Handle*>(handle(handle_index));
      err = WebRtcAecm_BufferFarend(
          my_handle,
-          audio->split_data_const(j, kBand0To8kHz),
+          audio->split_bands_const(j)[kBand0To8kHz],
          static_cast<int16_t>(audio->samples_per_split_channel()));

      if (err != apm_->kNoError) {
@@ -129,7 +129,7 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) {
    // TODO(ajm): improve how this works, possibly inside AECM.
    //            This is kind of hacked up.
    const int16_t* noisy = audio->low_pass_reference(i);
-    const int16_t* clean = audio->split_data_const(i, kBand0To8kHz);
+    const int16_t* clean = audio->split_bands_const(i)[kBand0To8kHz];
    if (noisy == NULL) {
      noisy = clean;
      clean = NULL;
@@ -140,7 +140,7 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) {
          my_handle,
          noisy,
          clean,
-          audio->split_data(i, kBand0To8kHz),
+          audio->split_bands(i)[kBand0To8kHz],
          static_cast<int16_t>(audio->samples_per_split_channel()),
          apm_->stream_delay_ms());

--- a/webrtc/modules/audio_processing/gain_control_impl.cc
+++ b/webrtc/modules/audio_processing/gain_control_impl.cc
@@ -90,8 +90,8 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
      Handle* my_handle = static_cast<Handle*>(handle(i));
      err = WebRtcAgc_AddMic(
          my_handle,
-          audio->split_data(i, kBand0To8kHz),
-          audio->split_data(i, kBand8To16kHz),
+          audio->split_bands(i)[kBand0To8kHz],
+          audio->split_bands(i)[kBand8To16kHz],
          static_cast<int16_t>(audio->samples_per_split_channel()));

      if (err != apm_->kNoError) {
@@ -106,8 +106,8 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {

      err = WebRtcAgc_VirtualMic(
          my_handle,
-          audio->split_data(i, kBand0To8kHz),
-          audio->split_data(i, kBand8To16kHz),
+          audio->split_bands(i)[kBand0To8kHz],
+          audio->split_bands(i)[kBand8To16kHz],
          static_cast<int16_t>(audio->samples_per_split_channel()),
          analog_capture_level_,
          &capture_level_out);
@@ -144,11 +144,11 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio) {

    int err = WebRtcAgc_Process(
        my_handle,
-        audio->split_data_const(i, kBand0To8kHz),
-        audio->split_data_const(i, kBand8To16kHz),
+        audio->split_bands_const(i)[kBand0To8kHz],
+        audio->split_bands_const(i)[kBand8To16kHz],
        static_cast<int16_t>(audio->samples_per_split_channel()),
-        audio->split_data(i, kBand0To8kHz),
-        audio->split_data(i, kBand8To16kHz),
+        audio->split_bands(i)[kBand0To8kHz],
+        audio->split_bands(i)[kBand8To16kHz],
        capture_levels_[i],
        &capture_level_out,
        apm_->echo_cancellation()->stream_has_echo(),
--- a/webrtc/modules/audio_processing/high_pass_filter_impl.cc
+++ b/webrtc/modules/audio_processing/high_pass_filter_impl.cc
@@ -123,7 +123,7 @@ int HighPassFilterImpl::ProcessCaptureAudio(AudioBuffer* audio) {
  for (int i = 0; i < num_handles(); i++) {
    Handle* my_handle = static_cast<Handle*>(handle(i));
    err = Filter(my_handle,
-                 audio->split_data(i, kBand0To8kHz),
+                 audio->split_bands(i)[kBand0To8kHz],
                 audio->samples_per_split_channel());

    if (err != apm_->kNoError) {
--- a/webrtc/modules/audio_processing/noise_suppression_impl.cc
+++ b/webrtc/modules/audio_processing/noise_suppression_impl.cc
@@ -66,19 +66,13 @@ int NoiseSuppressionImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
  for (int i = 0; i < num_handles(); ++i) {
    Handle* my_handle = static_cast<Handle*>(handle(i));

-    int err = WebRtcNs_Analyze(my_handle,
-                               audio->split_data_f(i, kBand0To8kHz));
-    if (err != apm_->kNoError) {
-      return GetHandleError(my_handle);
-    }
+    WebRtcNs_Analyze(my_handle, audio->split_bands_const_f(i)[kBand0To8kHz]);
  }
 #endif
  return apm_->kNoError;
 }

 int NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
-  int err = apm_->kNoError;
-
  if (!is_component_enabled()) {
    return apm_->kNoError;
  }
@@ -88,24 +82,17 @@ int NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
  for (int i = 0; i < num_handles(); ++i) {
    Handle* my_handle = static_cast<Handle*>(handle(i));
 #if defined(WEBRTC_NS_FLOAT)
-    err = WebRtcNs_Process(my_handle,
-                           audio->split_data_f(i, kBand0To8kHz),
-                           audio->split_data_f(i, kBand8To16kHz),
-                           audio->split_data_f(i, kBand0To8kHz),
-                           audio->split_data_f(i, kBand8To16kHz));
+    WebRtcNs_Process(my_handle,
+                     audio->split_bands_const_f(i),
+                     audio->num_bands(),
+                     audio->split_bands_f(i));
 #elif defined(WEBRTC_NS_FIXED)
-    err = WebRtcNsx_Process(my_handle,
-                            audio->split_data(i, kBand0To8kHz),
-                            audio->split_data(i, kBand8To16kHz),
-                            audio->split_data(i, kBand0To8kHz),
-                            audio->split_data(i, kBand8To16kHz));
+    WebRtcNsx_Process(my_handle,
+                      audio->split_bands_const(i),
+                      audio->num_bands(),
+                      audio->split_bands(i));
 #endif
-
-    if (err != apm_->kNoError) {
-      return GetHandleError(my_handle);
-    }
  }
-
  return apm_->kNoError;
 }

--- a/webrtc/modules/audio_processing/ns/defines.h
+++ b/webrtc/modules/audio_processing/ns/defines.h
@@ -14,6 +14,7 @@
 #define BLOCKL_MAX          160 // max processing block length: 160
 #define ANAL_BLOCKL_MAX     256 // max analysis block length: 256
 #define HALF_ANAL_BLOCKL    129 // half max analysis block length + 1
+#define NUM_HIGH_BANDS_MAX  2   // max number of high bands: 2

 #define QUANTILE            (float)0.25

--- a/webrtc/modules/audio_processing/ns/include/noise_suppression.h
+++ b/webrtc/modules/audio_processing/ns/include/noise_suppression.h
@@ -89,11 +89,8 @@ int WebRtcNs_set_policy(NsHandle* NS_inst, int mode);
 *
 * Output:
 *      - NS_inst       : Updated NS instance
- *
- * Return value         :  0 - OK
- *                        -1 - Error
 */
-int WebRtcNs_Analyze(NsHandle* NS_inst, float* spframe);
+void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe);

 /*
 * This functions does Noise Suppression for the inserted speech frame. The
@@ -101,23 +98,17 @@ int WebRtcNs_Analyze(NsHandle* NS_inst, float* spframe);
 *
 * Input
 *      - NS_inst       : Noise suppression instance.
- *      - spframe       : Pointer to speech frame buffer for L band
- *      - spframe_H     : Pointer to speech frame buffer for H band
- *      - fs            : sampling frequency
+ *      - spframe       : Pointer to speech frame buffer for each band
+ *      - num_bands     : Number of bands
 *
 * Output:
 *      - NS_inst       : Updated NS instance
- *      - outframe      : Pointer to output frame for L band
- *      - outframe_H    : Pointer to output frame for H band
- *
- * Return value         :  0 - OK
- *                        -1 - Error
+ *      - outframe      : Pointer to output frame for each band
 */
-int WebRtcNs_Process(NsHandle* NS_inst,
-                     float* spframe,
-                     float* spframe_H,
-                     float* outframe,
-                     float* outframe_H);
+void WebRtcNs_Process(NsHandle* NS_inst,
+                     const float* const* spframe,
+                     int num_bands,
+                     float* const* outframe);

 /* Returns the internally used prior speech probability of the current frame.
 * There is a frequency bin based one as well, with which this should not be
--- a/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h
+++ b/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h
@@ -84,23 +84,17 @@ int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode);
 *
 * Input
 *      - nsxInst       : NSx instance. Needs to be initiated before call.
- *      - speechFrame   : Pointer to speech frame buffer for L band
- *      - speechFrameHB : Pointer to speech frame buffer for H band
- *      - fs            : sampling frequency
+ *      - speechFrame   : Pointer to speech frame buffer for each band
+ *      - num_bands     : Number of bands
 *
 * Output:
 *      - nsxInst       : Updated NSx instance
- *      - outFrame      : Pointer to output frame for L band
- *      - outFrameHB    : Pointer to output frame for H band
- *
- * Return value         :  0 - OK
- *                        -1 - Error
+ *      - outFrame      : Pointer to output frame for each band
 */
-int WebRtcNsx_Process(NsxHandle* nsxInst,
-                      short* speechFrame,
-                      short* speechFrameHB,
-                      short* outFrame,
-                      short* outFrameHB);
+void WebRtcNsx_Process(NsxHandle* nsxInst,
+                       const short* const* speechFrame,
+                       int num_bands,
+                       short* const* outFrame);

 #ifdef __cplusplus
 }
--- a/webrtc/modules/audio_processing/ns/noise_suppression.c
+++ b/webrtc/modules/audio_processing/ns/noise_suppression.c
@@ -42,14 +42,15 @@ int WebRtcNs_set_policy(NsHandle* NS_inst, int mode) {
  return WebRtcNs_set_policy_core((NSinst_t*) NS_inst, mode);
 }

-int WebRtcNs_Analyze(NsHandle* NS_inst, float* spframe) {
-  return WebRtcNs_AnalyzeCore((NSinst_t*) NS_inst, spframe);
+void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe) {
+  WebRtcNs_AnalyzeCore((NSinst_t*) NS_inst, spframe);
 }

-int WebRtcNs_Process(NsHandle* NS_inst, float* spframe, float* spframe_H,
-                     float* outframe, float* outframe_H) {
-  return WebRtcNs_ProcessCore(
-      (NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H);
+void WebRtcNs_Process(NsHandle* NS_inst,
+                      const float* const* spframe,
+                      int num_bands,
+                      float* const* outframe) {
+  WebRtcNs_ProcessCore((NSinst_t*)NS_inst, spframe, num_bands, outframe);
 }

 float WebRtcNs_prior_speech_probability(NsHandle* handle) {
--- a/webrtc/modules/audio_processing/ns/noise_suppression_x.c
+++ b/webrtc/modules/audio_processing/ns/noise_suppression_x.c
@@ -45,9 +45,9 @@ int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode) {
  return WebRtcNsx_set_policy_core((NsxInst_t*)nsxInst, mode);
 }

-int WebRtcNsx_Process(NsxHandle* nsxInst, short* speechFrame,
-                      short* speechFrameHB, short* outFrame,
-                      short* outFrameHB) {
-  return WebRtcNsx_ProcessCore(
-      (NsxInst_t*)nsxInst, speechFrame, speechFrameHB, outFrame, outFrameHB);
+void WebRtcNsx_Process(NsxHandle* nsxInst,
+                      const short* const* speechFrame,
+                      int num_bands,
+                      short* const* outFrame) {
+  WebRtcNsx_ProcessCore((NsxInst_t*)nsxInst, speechFrame, num_bands, outFrame);
 }
--- a/webrtc/modules/audio_processing/ns/ns_core.c
+++ b/webrtc/modules/audio_processing/ns/ns_core.c
@@ -79,24 +79,18 @@ int WebRtcNs_InitCore(NSinst_t* self, uint32_t fs) {
  }

  // Initialization of struct.
-  if (fs == 8000 || fs == 16000 || fs == 32000) {
+  if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) {
    self->fs = fs;
  } else {
    return -1;
  }
  self->windShift = 0;
+  // We only support 10ms frames.
  if (fs == 8000) {
-    // We only support 10ms frames.
    self->blockLen = 80;
    self->anaLen = 128;
    self->window = kBlocks80w128;
-  } else if (fs == 16000) {
-    // We only support 10ms frames.
-    self->blockLen = 160;
-    self->anaLen = 256;
-    self->window = kBlocks160w256;
-  } else if (fs == 32000) {
-    // We only support 10ms frames.
+  } else {
    self->blockLen = 160;
    self->anaLen = 256;
    self->window = kBlocks160w256;
@@ -113,7 +107,9 @@ int WebRtcNs_InitCore(NSinst_t* self, uint32_t fs) {
  memset(self->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);

  // For HB processing.
-  memset(self->dataBufHB, 0, sizeof(float) * ANAL_BLOCKL_MAX);
+  memset(self->dataBufHB,
+         0,
+         sizeof(float) * NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX);

  // For quantile noise estimation.
  memset(self->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL);
@@ -1041,7 +1037,7 @@ int WebRtcNs_set_policy_core(NSinst_t* self, int mode) {
  return 0;
 }

-int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame) {
+void WebRtcNs_AnalyzeCore(NSinst_t* self, const float* speechFrame) {
  int i;
  const int kStartBand = 5;  // Skip first frequency bins during estimation.
  int updateParsFlag;
@@ -1062,9 +1058,7 @@ int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame) {
  float parametric_num = 0.0;

  // Check that initiation has been done.
-  if (self->initFlag != 1) {
-    return (-1);
-  }
+  assert(self->initFlag == 1);
  updateParsFlag = self->modelUpdatePars[0];

  // Update analysis buffer for L band.
@@ -1081,7 +1075,7 @@ int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame) {
    // Depending on the duration of the inactive signal it takes a
    // considerable amount of time for the system to learn what is noise and
    // what is speech.
-    return 0;
+    return;
  }

  self->blockInd++;  // Update the block index only when we process a block.
@@ -1181,18 +1175,15 @@ int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame) {
  // Keep track of noise spectrum for next frame.
  memcpy(self->noise, noise, sizeof(*noise) * self->magnLen);
  memcpy(self->magnPrevAnalyze, magn, sizeof(*magn) * self->magnLen);
-
-  return 0;
 }

-int WebRtcNs_ProcessCore(NSinst_t* self,
-                         float* speechFrame,
-                         float* speechFrameHB,
-                         float* outFrame,
-                         float* outFrameHB) {
+void WebRtcNs_ProcessCore(NSinst_t* self,
+                         const float* const* speechFrame,
+                         int num_bands,
+                         float* const* outFrame) {
  // Main routine for noise reduction.
  int flagHB = 0;
-  int i;
+  int i, j;

  float energy1, energy2, gain, factor, factor1, factor2;
  float fout[BLOCKL_MAX];
@@ -1211,14 +1202,16 @@ int WebRtcNs_ProcessCore(NSinst_t* self,
  float sumMagnAnalyze, sumMagnProcess;

  // Check that initiation has been done.
-  if (self->initFlag != 1) {
-    return (-1);
-  }
-  // Check for valid pointers based on sampling rate.
-  if (self->fs == 32000) {
-    if (speechFrameHB == NULL) {
-      return -1;
-    }
+  assert(self->initFlag == 1);
+  assert((num_bands - 1) <= NUM_HIGH_BANDS_MAX);
+
+  const float* const* speechFrameHB = NULL;
+  float* const* outFrameHB = NULL;
+  int num_high_bands = 0;
+  if (num_bands > 1) {
+    speechFrameHB = &speechFrame[1];
+    outFrameHB = &outFrame[1];
+    num_high_bands = num_bands - 1;
    flagHB = 1;
    // Range for averaging low band quantities for H band gain.
    deltaBweHB = (int)self->magnLen / 4;
@@ -1226,11 +1219,16 @@ int WebRtcNs_ProcessCore(NSinst_t* self,
  }

  // Update analysis buffer for L band.
-  UpdateBuffer(speechFrame, self->blockLen, self->anaLen, self->dataBuf);
+  UpdateBuffer(speechFrame[0], self->blockLen, self->anaLen, self->dataBuf);

  if (flagHB == 1) {
-    // Update analysis buffer for H band.
-    UpdateBuffer(speechFrameHB, self->blockLen, self->anaLen, self->dataBufHB);
+    // Update analysis buffer for H bands.
+    for (i = 0; i < num_high_bands; ++i) {
+      UpdateBuffer(speechFrameHB[i],
+                   self->blockLen,
+                   self->anaLen,
+                   self->dataBufHB[i]);
+    }
  }

  Windowing(self->window, self->dataBuf, self->anaLen, winData);
@@ -1245,16 +1243,21 @@ int WebRtcNs_ProcessCore(NSinst_t* self,
    UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);

    for (i = 0; i < self->blockLen; ++i)
-      outFrame[i] =
+      outFrame[0][i] =
          WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);

    // For time-domain gain of HB.
-    if (flagHB == 1)
-      for (i = 0; i < self->blockLen; ++i)
-        outFrameHB[i] = WEBRTC_SPL_SAT(
-            WEBRTC_SPL_WORD16_MAX, self->dataBufHB[i], WEBRTC_SPL_WORD16_MIN);
+    if (flagHB == 1) {
+      for (i = 0; i < num_high_bands; ++i) {
+        for (j = 0; j < self->blockLen; ++j) {
+          outFrameHB[i][j] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+                                            self->dataBufHB[i][j],
+                                            WEBRTC_SPL_WORD16_MIN);
+        }
+      }
+    }

-    return 0;
+    return;
  }

  FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn);
@@ -1349,7 +1352,7 @@ int WebRtcNs_ProcessCore(NSinst_t* self,
  UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);

  for (i = 0; i < self->blockLen; ++i)
-    outFrame[i] =
+    outFrame[0][i] =
        WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);

  // For time-domain gain of HB.
@@ -1397,12 +1400,13 @@ int WebRtcNs_ProcessCore(NSinst_t* self,
      gainTimeDomainHB = 1.f;
    }
    // Apply gain.
-    for (i = 0; i < self->blockLen; i++) {
-      float o = gainTimeDomainHB * self->dataBufHB[i];
-      outFrameHB[i] =
-          WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, o, WEBRTC_SPL_WORD16_MIN);
+    for (i = 0; i < num_high_bands; ++i) {
+      for (j = 0; j < self->blockLen; j++) {
+        outFrameHB[i][j] =
+            WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+                           gainTimeDomainHB * self->dataBufHB[i][j],
+                           WEBRTC_SPL_WORD16_MIN);
+      }
    }
  }  // End of H band gain computation.
-
-  return 0;
 }
--- a/webrtc/modules/audio_processing/ns/ns_core.h
+++ b/webrtc/modules/audio_processing/ns/ns_core.h
@@ -108,7 +108,8 @@ typedef struct NSinst_t_ {
  int histSpecDiff[HIST_PAR_EST];
  // Quantities for high band estimate.
  float speechProb[HALF_ANAL_BLOCKL];  // Final speech/noise prob: prior + LRT.
-  float dataBufHB[ANAL_BLOCKL_MAX];  // Buffering data for HB.
+  // Buffering data for HB.
+  float dataBufHB[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];

 } NSinst_t;

@@ -161,11 +162,8 @@ int WebRtcNs_set_policy_core(NSinst_t* self, int mode);
 *
 * Output:
 *      - self          : Updated instance
- *
- * Return value         :  0 - OK
- *                        -1 - Error
 */
-int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame);
+void WebRtcNs_AnalyzeCore(NSinst_t* self, const float* speechFrame);

 /****************************************************************************
 * WebRtcNs_ProcessCore
@@ -174,22 +172,17 @@ int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame);
 *
 * Input:
 *      - self          : Instance that should be initialized
- *      - inFrameLow    : Input speech frame for lower band
- *      - inFrameHigh   : Input speech frame for higher band
+ *      - inFrame       : Input speech frame for each band
+ *      - num_bands     : Number of bands
 *
 * Output:
 *      - self          : Updated instance
- *      - outFrameLow   : Output speech frame for lower band
- *      - outFrameHigh  : Output speech frame for higher band
- *
- * Return value         :  0 - OK
- *                        -1 - Error
+ *      - outFrame      : Output speech frame for each band
 */
-int WebRtcNs_ProcessCore(NSinst_t* self,
-                         float* inFrameLow,
-                         float* inFrameHigh,
-                         float* outFrameLow,
-                         float* outFrameHigh);
+void WebRtcNs_ProcessCore(NSinst_t* self,
+                         const float* const* inFrame,
+                         int num_bands,
+                         float* const* outFrame);

 #ifdef __cplusplus
 }
--- a/webrtc/modules/audio_processing/ns/nsx_core.c
+++ b/webrtc/modules/audio_processing/ns/nsx_core.c
@@ -637,7 +637,7 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) {
  //

  // Initialization of struct
-  if (fs == 8000 || fs == 16000 || fs == 32000) {
+  if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) {
    inst->fs = fs;
  } else {
    return -1;
@@ -651,15 +651,7 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) {
    inst->thresholdLogLrt = 131072; //default threshold for LRT feature
    inst->maxLrt = 0x0040000;
    inst->minLrt = 52429;
-  } else if (fs == 16000) {
-    inst->blockLen10ms = 160;
-    inst->anaLen = 256;
-    inst->stages = 8;
-    inst->window = kBlocks160w256x;
-    inst->thresholdLogLrt = 212644; //default threshold for LRT feature
-    inst->maxLrt = 0x0080000;
-    inst->minLrt = 104858;
-  } else if (fs == 32000) {
+  } else {
    inst->blockLen10ms = 160;
    inst->anaLen = 256;
    inst->stages = 8;
@@ -683,7 +675,8 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) {
  WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer, ANAL_BLOCKL_MAX);

  // for HB processing
-  WebRtcSpl_ZerosArrayW16(inst->dataBufHBFX, ANAL_BLOCKL_MAX);
+  WebRtcSpl_ZerosArrayW16(inst->dataBufHBFX[0],
+                          NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX);
  // for quantile noise estimation
  WebRtcSpl_ZerosArrayW16(inst->noiseEstQuantile, HALF_ANAL_BLOCKL);
  for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) {
@@ -1502,8 +1495,10 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) {
  WebRtcNsx_SynthesisUpdate(inst, outFrame, gainFactor);
 }

-int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFrameHB,
-                          short* outFrame, short* outFrameHB) {
+void WebRtcNsx_ProcessCore(NsxInst_t* inst,
+                           const short* const* speechFrame,
+                           int num_bands,
+                           short* const* outFrame) {
  // main routine for noise suppression

  uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
@@ -1535,7 +1530,7 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
  int16_t avgProbSpeechHB, gainModHB, avgFilterGainHB, gainTimeDomainHB;
  int16_t pink_noise_exp_avg = 0;

-  int i;
+  int i, j;
  int nShifts, postShifts;
  int norm32no1, norm32no2;
  int flag, sign;
@@ -1553,37 +1548,46 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
 #ifdef NS_FILEDEBUG
  if (fwrite(spframe, sizeof(short),
             inst->blockLen10ms, inst->infile) != inst->blockLen10ms) {
-    return -1;
+    assert(false);
  }
 #endif

  // Check that initialization has been done
-  if (inst->initFlag != 1) {
-    return -1;
-  }
-  // Check for valid pointers based on sampling rate
-  if ((inst->fs == 32000) && (speechFrameHB == NULL)) {
-    return -1;
+  assert(inst->initFlag == 1);
+  assert((num_bands - 1) <= NUM_HIGH_BANDS_MAX);
+
+  const short* const* speechFrameHB = NULL;
+  short* const* outFrameHB = NULL;
+  int num_high_bands = 0;
+  if (num_bands > 1) {
+    speechFrameHB = &speechFrame[1];
+    outFrameHB = &outFrame[1];
+    num_high_bands = num_bands - 1;
  }

  // Store speechFrame and transform to frequency domain
-  WebRtcNsx_DataAnalysis(inst, speechFrame, magnU16);
+  WebRtcNsx_DataAnalysis(inst, (short*)speechFrame[0], magnU16);

  if (inst->zeroInputSignal) {
-    WebRtcNsx_DataSynthesis(inst, outFrame);
+    WebRtcNsx_DataSynthesis(inst, outFrame[0]);

-    if (inst->fs == 32000) {
+    if (num_bands > 1) {
      // update analysis buffer for H band
      // append new data to buffer FX
-      WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX, inst->dataBufHBFX + inst->blockLen10ms,
-                            inst->anaLen - inst->blockLen10ms);
-      WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX + inst->anaLen - inst->blockLen10ms,
-                            speechFrameHB, inst->blockLen10ms);
-      for (i = 0; i < inst->blockLen10ms; i++) {
-        outFrameHB[i] = inst->dataBufHBFX[i]; // Q0
+      for (i = 0; i < num_high_bands; ++i) {
+        WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX[i],
+                              inst->dataBufHBFX[i] + inst->blockLen10ms,
+                              inst->anaLen - inst->blockLen10ms);
+        WEBRTC_SPL_MEMCPY_W16(
+            inst->dataBufHBFX[i] + inst->anaLen - inst->blockLen10ms,
+            speechFrameHB[i],
+            inst->blockLen10ms);
+        for (j = 0; j < inst->blockLen10ms; j++) {
+          outFrameHB[i][j] = inst->dataBufHBFX[i][j]; // Q0
+        }
      }
    }  // end of H band gain computation
-    return 0;
+    return;
  }

  // Update block index when we have something to process
@@ -2022,21 +2026,28 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
    }
  }

-  WebRtcNsx_DataSynthesis(inst, outFrame);
+  WebRtcNsx_DataSynthesis(inst, outFrame[0]);
 #ifdef NS_FILEDEBUG
  if (fwrite(outframe, sizeof(short),
             inst->blockLen10ms, inst->outfile) != inst->blockLen10ms) {
-    return -1;
+    assert(false);
  }
 #endif

  //for H band:
  // only update data buffer, then apply time-domain gain is applied derived from L band
-  if (inst->fs == 32000) {
+  if (num_bands > 1) {
    // update analysis buffer for H band
    // append new data to buffer FX
-    WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX, inst->dataBufHBFX + inst->blockLen10ms, inst->anaLen - inst->blockLen10ms);
-    WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX + inst->anaLen - inst->blockLen10ms, speechFrameHB, inst->blockLen10ms);
+    for (i = 0; i < num_high_bands; ++i) {
+      WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX[i],
+                            inst->dataBufHBFX[i] + inst->blockLen10ms,
+                            inst->anaLen - inst->blockLen10ms);
+      WEBRTC_SPL_MEMCPY_W16(
+          inst->dataBufHBFX[i] + inst->anaLen - inst->blockLen10ms,
+          speechFrameHB[i],
+          inst->blockLen10ms);
+    }
    // range for averaging low band quantities for H band gain

    gainTimeDomainHB = 16384; // 16384 = Q14(1.0)
@@ -2094,11 +2105,13 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram


    //apply gain
-    for (i = 0; i < inst->blockLen10ms; i++) {
-      outFrameHB[i]
-        = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(gainTimeDomainHB, inst->dataBufHBFX[i], 14); // Q0
+    for (i = 0; i < num_high_bands; ++i) {
+      for (j = 0; j < inst->blockLen10ms; j++) {
+        outFrameHB[i][j] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
+            gainTimeDomainHB,
+            inst->dataBufHBFX[i][j],
+            14); // Q0
+      }
    }
  }  // end of H band gain computation
-
-  return 0;
 }
--- a/webrtc/modules/audio_processing/ns/nsx_core.h
+++ b/webrtc/modules/audio_processing/ns/nsx_core.h
@@ -93,7 +93,7 @@ typedef struct NsxInst_t_ {
  int16_t                 histSpecDiff[HIST_PAR_EST];

  // Quantities for high band estimate.
-  int16_t                 dataBufHBFX[ANAL_BLOCKL_MAX];  // Q0
+  int16_t                 dataBufHBFX[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];

  int                     qNoise;
  int                     prevQNoise;
@@ -155,25 +155,20 @@ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode);
 *
 * Input:
 *      - inst          : Instance that should be initialized
- *      - inFrameLow    : Input speech frame for lower band
- *      - inFrameHigh   : Input speech frame for higher band
+ *      - inFrame       : Input speech frame for each band
+ *      - num_bands     : Number of bands
 *
 * Output:
 *      - inst          : Updated instance
- *      - outFrameLow   : Output speech frame for lower band
- *      - outFrameHigh  : Output speech frame for higher band
- *
- * Return value         :  0 - OK
- *                        -1 - Error
+ *      - outFrame      : Output speech frame for each band
 */
-int WebRtcNsx_ProcessCore(NsxInst_t* inst,
-                          short* inFrameLow,
-                          short* inFrameHigh,
-                          short* outFrameLow,
-                          short* outFrameHigh);
+void WebRtcNsx_ProcessCore(NsxInst_t* inst,
+                           const short* const* inFrame,
+                           int num_bands,
+                           short* const* outFrame);

 /****************************************************************************
- * Some function pointers, for internal functions shared by ARM NEON and 
+ * Some function pointers, for internal functions shared by ARM NEON and
 * generic C code.
 */
 // Noise Estimation.
--- a/webrtc/modules/audio_processing/ns/nsx_defines.h
+++ b/webrtc/modules/audio_processing/ns/nsx_defines.h
@@ -13,6 +13,7 @@

 #define ANAL_BLOCKL_MAX         256 /* Max analysis block length */
 #define HALF_ANAL_BLOCKL        129 /* Half max analysis block length + 1 */
+#define NUM_HIGH_BANDS_MAX      2   /* Max number of high bands */
 #define SIMULT                  3
 #define END_STARTUP_LONG        200
 #define END_STARTUP_SHORT       50