Add 48kHz support to Beamformer

Doing something similar for the band 16-24kHz to what is done for the band 8-16kHz Tested for 32kHz sample rate and the output is bitexact with how it was before this CL. BUG=webrtc:3146 R=andrew@webrtc.org, kwiberg@webrtc.org Review URL: https://webrtc-codereview.appspot.com/35159004 Cr-Commit-Position: refs/heads/master@{#8522} git-svn-id: http://webrtc.googlecode.com/svn/trunk@8522 4adac7df-926f-26a2-2b94-8c16560cd09d
2015-02-26 21:52:20 +00:00 · 2015-02-26 21:52:20 +00:00 · 3aca0b0b31
commit 3aca0b0b31
parent 9650ab4d59
7 changed files with 104 additions and 62 deletions
--- a/webrtc/modules/audio_processing/audio_buffer.cc
+++ b/webrtc/modules/audio_processing/audio_buffer.cc
@ -244,6 +244,24 @@ int16_t* const* AudioBuffer::split_channels(Band band) {
  }
 }

+ChannelBuffer<int16_t>* AudioBuffer::data() {
+  mixed_low_pass_valid_ = false;
+  return data_->ibuf();
+}
+
+const ChannelBuffer<int16_t>* AudioBuffer::data() const {
+  return data_->ibuf_const();
+}
+
+ChannelBuffer<int16_t>* AudioBuffer::split_data() {
+  mixed_low_pass_valid_ = false;
+  return split_data_.get() ? split_data_->ibuf() : data_->ibuf();
+}
+
+const ChannelBuffer<int16_t>* AudioBuffer::split_data() const {
+  return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const();
+}
+
 const float* const* AudioBuffer::channels_const_f() const {
  return data_->fbuf_const()->channels();
 }
@ -283,6 +301,24 @@ float* const* AudioBuffer::split_channels_f(Band band) {
  }
 }

+ChannelBuffer<float>* AudioBuffer::data_f() {
+  mixed_low_pass_valid_ = false;
+  return data_->fbuf();
+}
+
+const ChannelBuffer<float>* AudioBuffer::data_f() const {
+  return data_->fbuf_const();
+}
+
+ChannelBuffer<float>* AudioBuffer::split_data_f() {
+  mixed_low_pass_valid_ = false;
+  return split_data_.get() ? split_data_->fbuf() : data_->fbuf();
+}
+
+const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
+  return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();
+}
+
 const int16_t* AudioBuffer::mixed_low_pass_data() {
  // Currently only mixing stereo to mono is supported.
  assert(num_proc_channels_ == 1 || num_proc_channels_ == 2);
--- a/webrtc/modules/audio_processing/audio_buffer.h
+++ b/webrtc/modules/audio_processing/audio_buffer.h
@ -50,30 +50,59 @@ class AudioBuffer {
  int num_keyboard_frames() const;
  int num_bands() const;

-  // Sample array accessors. Channels are guaranteed to be stored contiguously
-  // in memory. Prefer to use the const variants of each accessor when
-  // possible, since they incur less float<->int16 conversion overhead.
+  // Returns a pointer array to the full-band channels.
+  // Usage:
+  // channels()[channel][sample].
+  // Where:
+  // 0 <= channel < |num_proc_channels_|
+  // 0 <= sample < |proc_num_frames_|
  int16_t* const* channels();
  const int16_t* const* channels_const() const;
+  float* const* channels_f();
+  const float* const* channels_const_f() const;
+
+  // Returns a pointer array to the bands for a specific channel.
+  // Usage:
+  // split_bands(channel)[band][sample].
+  // Where:
+  // 0 <= channel < |num_proc_channels_|
+  // 0 <= band < |num_bands_|
+  // 0 <= sample < |num_split_frames_|
  int16_t* const* split_bands(int channel);
  const int16_t* const* split_bands_const(int channel) const;
+  float* const* split_bands_f(int channel);
+  const float* const* split_bands_const_f(int channel) const;
+
+  // Returns a pointer array to the channels for a specific band.
+  // Usage:
+  // split_channels(band)[channel][sample].
+  // Where:
+  // 0 <= band < |num_bands_|
+  // 0 <= channel < |num_proc_channels_|
+  // 0 <= sample < |num_split_frames_|
  int16_t* const* split_channels(Band band);
  const int16_t* const* split_channels_const(Band band) const;
+  float* const* split_channels_f(Band band);
+  const float* const* split_channels_const_f(Band band) const;
+
+  // Returns a pointer to the ChannelBuffer that encapsulates the full-band
+  // data.
+  ChannelBuffer<int16_t>* data();
+  const ChannelBuffer<int16_t>* data() const;
+  ChannelBuffer<float>* data_f();
+  const ChannelBuffer<float>* data_f() const;
+
+  // Returns a pointer to the ChannelBuffer that encapsulates the split data.
+  ChannelBuffer<int16_t>* split_data();
+  const ChannelBuffer<int16_t>* split_data() const;
+  ChannelBuffer<float>* split_data_f();
+  const ChannelBuffer<float>* split_data_f() const;

  // Returns a pointer to the low-pass data downmixed to mono. If this data
  // isn't already available it re-calculates it.
  const int16_t* mixed_low_pass_data();
  const int16_t* low_pass_reference(int channel) const;

-  // Float versions of the accessors, with automatic conversion back and forth
-  // as necessary. The range of the numbers are the same as for int16_t.
-  float* const* channels_f();
-  const float* const* channels_const_f() const;
-  float* const* split_bands_f(int channel);
-  const float* const* split_bands_const_f(int channel) const;
-  float* const* split_channels_f(Band band);
-  const float* const* split_channels_const_f(Band band) const;
-
  const float* keyboard_data() const;

  void set_activity(AudioFrame::VADActivity activity);
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@ -610,12 +610,7 @@ int AudioProcessingImpl::ProcessStreamLocked() {

 #ifdef WEBRTC_BEAMFORMER
  if (beamformer_enabled_) {
-    beamformer_->ProcessChunk(ca->split_channels_const_f(kBand0To8kHz),
-                              ca->split_channels_const_f(kBand8To16kHz),
-                              ca->num_channels(),
-                              ca->num_frames_per_band(),
-                              ca->split_channels_f(kBand0To8kHz),
-                              ca->split_channels_f(kBand8To16kHz));
+    beamformer_->ProcessChunk(ca->split_data_f(), ca->split_data_f());
    ca->set_num_channels(1);
  }
 #endif
--- a/webrtc/modules/audio_processing/beamformer/beamformer.cc
+++ b/webrtc/modules/audio_processing/beamformer/beamformer.cc
@ -295,36 +295,32 @@ void Beamformer::InitInterfCovMats() {
  }
 }

-void Beamformer::ProcessChunk(const float* const* input,
-                              const float* const* high_pass_split_input,
-                              int num_input_channels,
-                              int num_frames_per_band,
-                              float* const* output,
-                              float* const* high_pass_split_output) {
-  CHECK_EQ(num_input_channels, num_input_channels_);
-  CHECK_EQ(num_frames_per_band, chunk_length_);
+void Beamformer::ProcessChunk(const ChannelBuffer<float>* input,
+                              ChannelBuffer<float>* output) {
+  DCHECK_EQ(input->num_channels(), num_input_channels_);
+  DCHECK_EQ(input->num_frames_per_band(), chunk_length_);

  float old_high_pass_mask = high_pass_postfilter_mask_;
-  lapped_transform_->ProcessChunk(input, output);
-
+  lapped_transform_->ProcessChunk(input->channels(0), output->channels(0));
+  // Ramp up/down for smoothing. 1 mask per 10ms results in audible
+  // discontinuities.
+  const float ramp_increment =
+      (high_pass_postfilter_mask_ - old_high_pass_mask) /
+      input->num_frames_per_band();
  // Apply delay and sum and post-filter in the time domain. WARNING: only works
  // because delay-and-sum is not frequency dependent.
-  if (high_pass_split_input != NULL) {
-    // Ramp up/down for smoothing. 1 mask per 10ms results in audible
-    // discontinuities.
-    float ramp_inc =
-        (high_pass_postfilter_mask_ - old_high_pass_mask) / num_frames_per_band;
-    for (int i = 0; i < num_frames_per_band; ++i) {
-      old_high_pass_mask += ramp_inc;
+  for (int i = 1; i < input->num_bands(); ++i) {
+    float smoothed_mask = old_high_pass_mask;
+    for (int j = 0; j < input->num_frames_per_band(); ++j) {
+      smoothed_mask += ramp_increment;

      // Applying the delay and sum (at zero degrees, this is equivalent to
      // averaging).
      float sum = 0.f;
-      for (int j = 0; j < num_input_channels; ++j) {
-        sum += high_pass_split_input[j][i];
+      for (int k = 0; k < input->num_channels(); ++k) {
+        sum += input->channels(i)[k][j];
      }
-      high_pass_split_output[0][i] =
-          sum / num_input_channels * old_high_pass_mask;
+      output->channels(i)[0][j] = sum / input->num_channels() * smoothed_mask;
    }
  }
 }
--- a/webrtc/modules/audio_processing/beamformer/beamformer.h
+++ b/webrtc/modules/audio_processing/beamformer/beamformer.h
@ -36,17 +36,12 @@ class Beamformer : public LappedTransform::Callback {
  // Needs to be called before the Beamformer can be used.
  virtual void Initialize(int chunk_size_ms, int sample_rate_hz);

-  // Process one time-domain chunk of audio. The audio can be separated into
-  // two signals by frequency, with the higher half passed in as the second
-  // parameter. Use NULL for |high_pass_split_input| if you only have one
-  // audio signal. The number of frames and channels must correspond to the
-  // ctor parameters. The same signal can be passed in as |input| and |output|.
-  virtual void ProcessChunk(const float* const* input,
-                            const float* const* high_pass_split_input,
-                            int num_input_channels,
-                            int num_frames_per_band,
-                            float* const* output,
-                            float* const* high_pass_split_output);
+  // Process one time-domain chunk of audio. The audio is expected to be split
+  // into frequency bands inside the ChannelBuffer. The number of frames and
+  // channels must correspond to the constructor parameters. The same
+  // ChannelBuffer can be passed in as |input| and |output|.
+  virtual void ProcessChunk(const ChannelBuffer<float>* input,
+                            ChannelBuffer<float>* output);
  // After processing each block |is_target_present_| is set to true if the
  // target signal es present and to false otherwise. This methods can be called
  // to know if the data is target signal or interference and process it
--- a/webrtc/modules/audio_processing/beamformer/beamformer_test.cc
+++ b/webrtc/modules/audio_processing/beamformer/beamformer_test.cc
@ -71,12 +71,7 @@ int main(int argc, char* argv[]) {
      break;
    }

-    bf.ProcessChunk(captured_audio_cb.channels(),
-                    NULL,
-                    FLAGS_num_input_channels,
-                    kChunkSize,
-                    captured_audio_cb.channels(),
-                    NULL);
+    bf.ProcessChunk(&captured_audio_cb, &captured_audio_cb);
    webrtc::PcmWriteFromFloat(
        write_file, kChunkSize, 1, captured_audio_cb.channels());
  }
--- a/webrtc/modules/audio_processing/beamformer/mock_beamformer.h
+++ b/webrtc/modules/audio_processing/beamformer/mock_beamformer.h
@ -24,12 +24,8 @@ class MockBeamformer : public Beamformer {
  ~MockBeamformer() override;

  MOCK_METHOD2(Initialize, void(int chunk_size_ms, int sample_rate_hz));
-  MOCK_METHOD6(ProcessChunk, void(const float* const* input,
-                                  const float* const* high_pass_split_input,
-                                  int num_input_channels,
-                                  int num_frames_per_band,
-                                  float* const* output,
-                                  float* const* high_pass_split_output));
+  MOCK_METHOD2(ProcessChunk, void(const ChannelBuffer<float>* input,
+                                  ChannelBuffer<float>* output));
  MOCK_METHOD0(is_target_present, bool());
 };