diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc index 6936155a1..c53d4df98 100644 --- a/webrtc/modules/audio_processing/audio_buffer.cc +++ b/webrtc/modules/audio_processing/audio_buffer.cc @@ -23,6 +23,35 @@ enum { kSamplesPer32kHzChannel = 320 }; +bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) { + switch (layout) { + case AudioProcessing::kMono: + case AudioProcessing::kStereo: + return false; + case AudioProcessing::kMonoAndKeyboard: + case AudioProcessing::kStereoAndKeyboard: + return true; + } + assert(false); + return false; +} + +int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) { + switch (layout) { + case AudioProcessing::kMono: + case AudioProcessing::kStereo: + assert(false); + return -1; + case AudioProcessing::kMonoAndKeyboard: + return 1; + case AudioProcessing::kStereoAndKeyboard: + return 2; + } + assert(false); + return -1; +} + + void StereoToMono(const float* left, const float* right, float* out, int samples_per_channel) { for (int i = 0; i < samples_per_channel; ++i) { @@ -32,8 +61,9 @@ void StereoToMono(const float* left, const float* right, float* out, void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out, int samples_per_channel) { - for (int i = 0; i < samples_per_channel; i++) + for (int i = 0; i < samples_per_channel; ++i) { out[i] = (left[i] + right[i]) >> 1; + } } } // namespace @@ -72,6 +102,7 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel, activity_(AudioFrame::kVadUnknown), is_muted_(false), data_(NULL), + keyboard_data_(NULL), channels_(new ChannelBuffer(proc_samples_per_channel_, num_proc_channels_)) { assert(input_samples_per_channel_ > 0); @@ -118,6 +149,8 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel, } } +AudioBuffer::~AudioBuffer() {} + void AudioBuffer::CopyFrom(const float* const* data, int samples_per_channel, AudioProcessing::ChannelLayout layout) { @@ -125,6 +158,10 @@ void AudioBuffer::CopyFrom(const float* const* data, assert(ChannelsFromLayout(layout) == num_input_channels_); InitForNewData(); + if (HasKeyboardChannel(layout)) { + keyboard_data_ = data[KeyboardChannelIndex(layout)]; + } + // Downmix. const float* const* data_ptr = data; if (num_input_channels_ == 2 && num_proc_channels_ == 1) { @@ -180,10 +217,9 @@ void AudioBuffer::CopyTo(int samples_per_channel, } } -AudioBuffer::~AudioBuffer() {} - void AudioBuffer::InitForNewData() { data_ = NULL; + keyboard_data_ = NULL; data_was_mixed_ = false; num_mixed_channels_ = 0; num_mixed_low_pass_channels_ = 0; @@ -240,6 +276,10 @@ int16_t* AudioBuffer::low_pass_reference(int channel) const { return low_pass_reference_channels_->channel(channel); } +const float* AudioBuffer::keyboard_data() const { + return keyboard_data_; +} + SplitFilterStates* AudioBuffer::filter_states(int channel) const { assert(channel >= 0 && channel < num_proc_channels_); return &filter_states_[channel]; @@ -269,6 +309,11 @@ int AudioBuffer::samples_per_split_channel() const { return samples_per_split_channel_; } +int AudioBuffer::samples_per_keyboard_channel() const { + // We don't resample the keyboard channel. + return input_samples_per_channel_; +} + // TODO(andrew): Do deinterleaving and mixing in one step? void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { assert(proc_samples_per_channel_ == input_samples_per_channel_); diff --git a/webrtc/modules/audio_processing/audio_buffer.h b/webrtc/modules/audio_processing/audio_buffer.h index 45e62a450..eaf53eb6b 100644 --- a/webrtc/modules/audio_processing/audio_buffer.h +++ b/webrtc/modules/audio_processing/audio_buffer.h @@ -53,6 +53,7 @@ class AudioBuffer { int num_channels() const; int samples_per_channel() const; int samples_per_split_channel() const; + int samples_per_keyboard_channel() const; int16_t* data(int channel) const; int16_t* low_pass_split_data(int channel) const; @@ -60,6 +61,7 @@ class AudioBuffer { int16_t* mixed_data(int channel) const; int16_t* mixed_low_pass_data(int channel) const; int16_t* low_pass_reference(int channel) const; + const float* keyboard_data() const; SplitFilterStates* filter_states(int channel) const; @@ -106,6 +108,7 @@ class AudioBuffer { bool is_muted_; int16_t* data_; + const float* keyboard_data_; scoped_ptr > channels_; scoped_ptr split_channels_; scoped_ptr filter_states_; diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index 147cb1823..de387edb2 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -468,48 +468,46 @@ int AudioProcessingImpl::ProcessStreamLocked() { } #endif + AudioBuffer* ca = capture_audio_.get(); // For brevity. bool data_processed = is_data_processed(); if (analysis_needed(data_processed)) { for (int i = 0; i < fwd_proc_format_.num_channels(); i++) { - SplitFilterStates* filter_states = capture_audio_->filter_states(i); // Split into a low and high band. - WebRtcSpl_AnalysisQMF(capture_audio_->data(i), - capture_audio_->samples_per_channel(), - capture_audio_->low_pass_split_data(i), - capture_audio_->high_pass_split_data(i), - filter_states->analysis_filter_state1, - filter_states->analysis_filter_state2); + WebRtcSpl_AnalysisQMF(ca->data(i), + ca->samples_per_channel(), + ca->low_pass_split_data(i), + ca->high_pass_split_data(i), + ca->filter_states(i)->analysis_filter_state1, + ca->filter_states(i)->analysis_filter_state2); } } - RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(capture_audio_.get())); - RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(capture_audio_.get())); - RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(capture_audio_.get())); + RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(ca)); + RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(ca)); + RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(ca)); if (echo_control_mobile_->is_enabled() && noise_suppression_->is_enabled()) { - capture_audio_->CopyLowPassToReference(); + ca->CopyLowPassToReference(); } - RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(capture_audio_.get())); - RETURN_ON_ERR( - echo_control_mobile_->ProcessCaptureAudio(capture_audio_.get())); - RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(capture_audio_.get())); - RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(capture_audio_.get())); + RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(ca)); + RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca)); + RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca)); + RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(ca)); if (synthesis_needed(data_processed)) { for (int i = 0; i < fwd_proc_format_.num_channels(); i++) { // Recombine low and high bands. - SplitFilterStates* filter_states = capture_audio_->filter_states(i); - WebRtcSpl_SynthesisQMF(capture_audio_->low_pass_split_data(i), - capture_audio_->high_pass_split_data(i), - capture_audio_->samples_per_split_channel(), - capture_audio_->data(i), - filter_states->synthesis_filter_state1, - filter_states->synthesis_filter_state2); + WebRtcSpl_SynthesisQMF(ca->low_pass_split_data(i), + ca->high_pass_split_data(i), + ca->samples_per_split_channel(), + ca->data(i), + ca->filter_states(i)->synthesis_filter_state1, + ca->filter_states(i)->synthesis_filter_state2); } } // The level estimator operates on the recombined data. - RETURN_ON_ERR(level_estimator_->ProcessStream(capture_audio_.get())); + RETURN_ON_ERR(level_estimator_->ProcessStream(ca)); was_stream_delay_set_ = false; return kNoError; @@ -592,27 +590,23 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { return AnalyzeReverseStreamLocked(); } -// TODO(ajm): Have AnalyzeReverseStream accept sample rates not matching the -// primary stream and convert ourselves rather than having the user manage it. -// We can be smarter and use the splitting filter when appropriate. Similarly, -// perform downmixing here. int AudioProcessingImpl::AnalyzeReverseStreamLocked() { + AudioBuffer* ra = render_audio_.get(); // For brevity. if (rev_proc_format_.rate() == kSampleRate32kHz) { for (int i = 0; i < rev_proc_format_.num_channels(); i++) { // Split into low and high band. - SplitFilterStates* filter_states = render_audio_->filter_states(i); - WebRtcSpl_AnalysisQMF(render_audio_->data(i), - render_audio_->samples_per_channel(), - render_audio_->low_pass_split_data(i), - render_audio_->high_pass_split_data(i), - filter_states->analysis_filter_state1, - filter_states->analysis_filter_state2); + WebRtcSpl_AnalysisQMF(ra->data(i), + ra->samples_per_channel(), + ra->low_pass_split_data(i), + ra->high_pass_split_data(i), + ra->filter_states(i)->analysis_filter_state1, + ra->filter_states(i)->analysis_filter_state2); } } - RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(render_audio_.get())); - RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(render_audio_.get())); - RETURN_ON_ERR(gain_control_->ProcessRenderAudio(render_audio_.get())); + RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra)); + RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra)); + RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra)); return kNoError; } diff --git a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc index 8976adf92..0c5b67df5 100644 --- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc +++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc @@ -81,6 +81,21 @@ void ConvertToFloat(const AudioFrame& frame, ChannelBuffer* cb) { ConvertToFloat(frame.data_, cb); } +// Number of channels including the keyboard channel. +int TotalChannelsFromLayout(AudioProcessing::ChannelLayout layout) { + switch (layout) { + case AudioProcessing::kMono: + return 1; + case AudioProcessing::kMonoAndKeyboard: + case AudioProcessing::kStereo: + return 2; + case AudioProcessing::kStereoAndKeyboard: + return 3; + } + assert(false); + return -1; +} + int TruncateToMultipleOf10(int value) { return (value / 10) * 10; } @@ -1916,6 +1931,43 @@ TEST_F(ApmTest, DISABLED_ON_ANDROID(Process)) { #endif // WEBRTC_AUDIOPROC_BIT_EXACT +TEST_F(ApmTest, NoErrorsWithKeyboardChannel) { + struct ChannelFormat { + AudioProcessing::ChannelLayout in_layout; + AudioProcessing::ChannelLayout out_layout; + }; + ChannelFormat cf[] = { + {AudioProcessing::kMonoAndKeyboard, AudioProcessing::kMono}, + {AudioProcessing::kStereoAndKeyboard, AudioProcessing::kMono}, + {AudioProcessing::kStereoAndKeyboard, AudioProcessing::kStereo}, + }; + size_t channel_format_size = sizeof(cf) / sizeof(*cf); + + scoped_ptr ap(AudioProcessing::Create()); + // Enable one component just to ensure some processing takes place. + ap->noise_suppression()->Enable(true); + for (size_t i = 0; i < channel_format_size; ++i) { + const int in_rate = 44100; + const int out_rate = 48000; + ChannelBuffer in_cb(SamplesFromRate(in_rate), + TotalChannelsFromLayout(cf[i].in_layout)); + ChannelBuffer out_cb(SamplesFromRate(out_rate), + ChannelsFromLayout(cf[i].out_layout)); + + // Run over a few chunks. + for (int j = 0; j < 10; ++j) { + EXPECT_NOERR(ap->ProcessStream( + in_cb.channels(), + in_cb.samples_per_channel(), + in_rate, + cf[i].in_layout, + out_rate, + cf[i].out_layout, + out_cb.channels())); + } + } +} + // Reads a 10 ms chunk of int16 interleaved audio from the given (assumed // stereo) file, converts to deinterleaved float (optionally downmixing) and // returns the result in |cb|. Returns false if the file ended (or on error) and