Add keyboard channel support to AudioBuffer.

Also use local aliases for AudioBuffers for brevity. BUG=2894 R=aluebs@webrtc.org, bjornv@webrtc.org Review URL: https://webrtc-codereview.appspot.com/13369005 git-svn-id: http://webrtc.googlecode.com/svn/trunk@5973 4adac7df-926f-26a2-2b94-8c16560cd09d
2014-04-24 18:28:56 +00:00 · 2014-04-24 18:28:56 +00:00 · 103657b484
commit 103657b484
parent d57b8149c2
4 changed files with 135 additions and 41 deletions
--- a/webrtc/modules/audio_processing/audio_buffer.cc
+++ b/webrtc/modules/audio_processing/audio_buffer.cc
@ -23,6 +23,35 @@ enum {
  kSamplesPer32kHzChannel = 320
 };
 bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
  switch (layout) {
    case AudioProcessing::kMono:
    case AudioProcessing::kStereo:
      return false;
    case AudioProcessing::kMonoAndKeyboard:
    case AudioProcessing::kStereoAndKeyboard:
      return true;
  }
  assert(false);
  return false;
 }
 int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
  switch (layout) {
    case AudioProcessing::kMono:
    case AudioProcessing::kStereo:
      assert(false);
      return -1;
    case AudioProcessing::kMonoAndKeyboard:
      return 1;
    case AudioProcessing::kStereoAndKeyboard:
      return 2;
  }
  assert(false);
  return -1;
 }
 void StereoToMono(const float* left, const float* right, float* out,
                  int samples_per_channel) {
  for (int i = 0; i < samples_per_channel; ++i) {
@ -32,8 +61,9 @@ void StereoToMono(const float* left, const float* right, float* out,
 void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
                  int samples_per_channel) {
-  for (int i = 0; i < samples_per_channel; i++)
+  for (int i = 0; i < samples_per_channel; ++i) {
    out[i] = (left[i] + right[i]) >> 1;
  }
 }
 }  // namespace
@ -72,6 +102,7 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
    activity_(AudioFrame::kVadUnknown),
    is_muted_(false),
    data_(NULL),
    keyboard_data_(NULL),
    channels_(new ChannelBuffer<int16_t>(proc_samples_per_channel_,
                                         num_proc_channels_)) {
  assert(input_samples_per_channel_ > 0);
@ -118,6 +149,8 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
  }
 }
 AudioBuffer::~AudioBuffer() {}
 void AudioBuffer::CopyFrom(const float* const* data,
                           int samples_per_channel,
                           AudioProcessing::ChannelLayout layout) {
@ -125,6 +158,10 @@ void AudioBuffer::CopyFrom(const float* const* data,
  assert(ChannelsFromLayout(layout) == num_input_channels_);
  InitForNewData();
  if (HasKeyboardChannel(layout)) {
    keyboard_data_ = data[KeyboardChannelIndex(layout)];
  }
  // Downmix.
  const float* const* data_ptr = data;
  if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
@ -180,10 +217,9 @@ void AudioBuffer::CopyTo(int samples_per_channel,
  }
 }
 AudioBuffer::~AudioBuffer() {}
 void AudioBuffer::InitForNewData() {
  data_ = NULL;
  keyboard_data_ = NULL;
  data_was_mixed_ = false;
  num_mixed_channels_ = 0;
  num_mixed_low_pass_channels_ = 0;
@ -240,6 +276,10 @@ int16_t* AudioBuffer::low_pass_reference(int channel) const {
  return low_pass_reference_channels_->channel(channel);
 }
 const float* AudioBuffer::keyboard_data() const {
  return keyboard_data_;
 }
 SplitFilterStates* AudioBuffer::filter_states(int channel) const {
  assert(channel >= 0 && channel < num_proc_channels_);
  return &filter_states_[channel];
@ -269,6 +309,11 @@ int AudioBuffer::samples_per_split_channel() const {
  return samples_per_split_channel_;
 }
 int AudioBuffer::samples_per_keyboard_channel() const {
  // We don't resample the keyboard channel.
  return input_samples_per_channel_;
 }
 // TODO(andrew): Do deinterleaving and mixing in one step?
 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
  assert(proc_samples_per_channel_ == input_samples_per_channel_);
--- a/webrtc/modules/audio_processing/audio_buffer.h
+++ b/webrtc/modules/audio_processing/audio_buffer.h
@ -53,6 +53,7 @@ class AudioBuffer {
  int num_channels() const;
  int samples_per_channel() const;
  int samples_per_split_channel() const;
  int samples_per_keyboard_channel() const;
  int16_t* data(int channel) const;
  int16_t* low_pass_split_data(int channel) const;
@ -60,6 +61,7 @@ class AudioBuffer {
  int16_t* mixed_data(int channel) const;
  int16_t* mixed_low_pass_data(int channel) const;
  int16_t* low_pass_reference(int channel) const;
  const float* keyboard_data() const;
  SplitFilterStates* filter_states(int channel) const;
@ -106,6 +108,7 @@ class AudioBuffer {
  bool is_muted_;
  int16_t* data_;
  const float* keyboard_data_;
  scoped_ptr<ChannelBuffer<int16_t> > channels_;
  scoped_ptr<SplitChannelBuffer> split_channels_;
  scoped_ptr<SplitFilterStates[]> filter_states_;
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@ -468,48 +468,46 @@ int AudioProcessingImpl::ProcessStreamLocked() {
  }
 #endif
  AudioBuffer* ca = capture_audio_.get();  // For brevity.
  bool data_processed = is_data_processed();
  if (analysis_needed(data_processed)) {
    for (int i = 0; i < fwd_proc_format_.num_channels(); i++) {
      SplitFilterStates* filter_states = capture_audio_->filter_states(i);
      // Split into a low and high band.
-      WebRtcSpl_AnalysisQMF(capture_audio_->data(i),
+      WebRtcSpl_AnalysisQMF(ca->data(i),
-                            capture_audio_->samples_per_channel(),
+                            ca->samples_per_channel(),
-                            capture_audio_->low_pass_split_data(i),
+                            ca->low_pass_split_data(i),
-                            capture_audio_->high_pass_split_data(i),
+                            ca->high_pass_split_data(i),
-                            filter_states->analysis_filter_state1,
+                            ca->filter_states(i)->analysis_filter_state1,
-                            filter_states->analysis_filter_state2);
+                            ca->filter_states(i)->analysis_filter_state2);
    }
  }
-  RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(capture_audio_.get()));
+  RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(ca));
-  RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(capture_audio_.get()));
+  RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(ca));
-  RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(capture_audio_.get()));
+  RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(ca));
  if (echo_control_mobile_->is_enabled() && noise_suppression_->is_enabled()) {
-    capture_audio_->CopyLowPassToReference();
+    ca->CopyLowPassToReference();
  }
-  RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(capture_audio_.get()));
+  RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(ca));
-  RETURN_ON_ERR(
+  RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca));
-      echo_control_mobile_->ProcessCaptureAudio(capture_audio_.get()));
+  RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca));
-  RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(capture_audio_.get()));
+  RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(ca));
  RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(capture_audio_.get()));
  if (synthesis_needed(data_processed)) {
    for (int i = 0; i < fwd_proc_format_.num_channels(); i++) {
      // Recombine low and high bands.
-      SplitFilterStates* filter_states = capture_audio_->filter_states(i);
+      WebRtcSpl_SynthesisQMF(ca->low_pass_split_data(i),
-      WebRtcSpl_SynthesisQMF(capture_audio_->low_pass_split_data(i),
+                             ca->high_pass_split_data(i),
-                             capture_audio_->high_pass_split_data(i),
+                             ca->samples_per_split_channel(),
-                             capture_audio_->samples_per_split_channel(),
+                             ca->data(i),
-                             capture_audio_->data(i),
+                             ca->filter_states(i)->synthesis_filter_state1,
-                             filter_states->synthesis_filter_state1,
+                             ca->filter_states(i)->synthesis_filter_state2);
                             filter_states->synthesis_filter_state2);
    }
  }
  // The level estimator operates on the recombined data.
-  RETURN_ON_ERR(level_estimator_->ProcessStream(capture_audio_.get()));
+  RETURN_ON_ERR(level_estimator_->ProcessStream(ca));
  was_stream_delay_set_ = false;
  return kNoError;
@ -592,27 +590,23 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
  return AnalyzeReverseStreamLocked();
 }
 // TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the
 // primary stream and convert ourselves rather than having the user manage it.
 // We can be smarter and use the splitting filter when appropriate. Similarly,
 // perform downmixing here.
 int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
  AudioBuffer* ra = render_audio_.get();  // For brevity.
  if (rev_proc_format_.rate() == kSampleRate32kHz) {
    for (int i = 0; i < rev_proc_format_.num_channels(); i++) {
      // Split into low and high band.
-      SplitFilterStates* filter_states = render_audio_->filter_states(i);
+      WebRtcSpl_AnalysisQMF(ra->data(i),
-      WebRtcSpl_AnalysisQMF(render_audio_->data(i),
+                            ra->samples_per_channel(),
-                            render_audio_->samples_per_channel(),
+                            ra->low_pass_split_data(i),
-                            render_audio_->low_pass_split_data(i),
+                            ra->high_pass_split_data(i),
-                            render_audio_->high_pass_split_data(i),
+                            ra->filter_states(i)->analysis_filter_state1,
-                            filter_states->analysis_filter_state1,
+                            ra->filter_states(i)->analysis_filter_state2);
                            filter_states->analysis_filter_state2);
    }
  }
-  RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(render_audio_.get()));
+  RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra));
-  RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(render_audio_.get()));
+  RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra));
-  RETURN_ON_ERR(gain_control_->ProcessRenderAudio(render_audio_.get()));
+  RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
  return kNoError;
 }
--- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc
+++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc
@ -81,6 +81,21 @@ void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) {
  ConvertToFloat(frame.data_, cb);
 }
 // Number of channels including the keyboard channel.
 int TotalChannelsFromLayout(AudioProcessing::ChannelLayout layout) {
  switch (layout) {
    case AudioProcessing::kMono:
      return 1;
    case AudioProcessing::kMonoAndKeyboard:
    case AudioProcessing::kStereo:
      return 2;
    case AudioProcessing::kStereoAndKeyboard:
      return 3;
  }
  assert(false);
  return -1;
 }
 int TruncateToMultipleOf10(int value) {
  return (value / 10) * 10;
 }
@ -1916,6 +1931,43 @@ TEST_F(ApmTest, DISABLED_ON_ANDROID(Process)) {
 #endif  // WEBRTC_AUDIOPROC_BIT_EXACT
 TEST_F(ApmTest, NoErrorsWithKeyboardChannel) {
  struct ChannelFormat {
    AudioProcessing::ChannelLayout in_layout;
    AudioProcessing::ChannelLayout out_layout;
  };
  ChannelFormat cf[] = {
    {AudioProcessing::kMonoAndKeyboard, AudioProcessing::kMono},
    {AudioProcessing::kStereoAndKeyboard, AudioProcessing::kMono},
    {AudioProcessing::kStereoAndKeyboard, AudioProcessing::kStereo},
  };
  size_t channel_format_size = sizeof(cf) / sizeof(*cf);
  scoped_ptr<AudioProcessing> ap(AudioProcessing::Create());
  // Enable one component just to ensure some processing takes place.
  ap->noise_suppression()->Enable(true);
  for (size_t i = 0; i < channel_format_size; ++i) {
    const int in_rate = 44100;
    const int out_rate = 48000;
    ChannelBuffer<float> in_cb(SamplesFromRate(in_rate),
                               TotalChannelsFromLayout(cf[i].in_layout));
    ChannelBuffer<float> out_cb(SamplesFromRate(out_rate),
                                ChannelsFromLayout(cf[i].out_layout));
    // Run over a few chunks.
    for (int j = 0; j < 10; ++j) {
      EXPECT_NOERR(ap->ProcessStream(
          in_cb.channels(),
          in_cb.samples_per_channel(),
          in_rate,
          cf[i].in_layout,
          out_rate,
          cf[i].out_layout,
          out_cb.channels()));
    }
  }
 }
 // Reads a 10 ms chunk of int16 interleaved audio from the given (assumed
 // stereo) file, converts to deinterleaved float (optionally downmixing) and
 // returns the result in |cb|. Returns false if the file ended (or on error) and