diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc index c4dff8a8d..e7419440e 100644 --- a/webrtc/modules/audio_processing/audio_buffer.cc +++ b/webrtc/modules/audio_processing/audio_buffer.cc @@ -244,6 +244,24 @@ int16_t* const* AudioBuffer::split_channels(Band band) { } } +ChannelBuffer* AudioBuffer::data() { + mixed_low_pass_valid_ = false; + return data_->ibuf(); +} + +const ChannelBuffer* AudioBuffer::data() const { + return data_->ibuf_const(); +} + +ChannelBuffer* AudioBuffer::split_data() { + mixed_low_pass_valid_ = false; + return split_data_.get() ? split_data_->ibuf() : data_->ibuf(); +} + +const ChannelBuffer* AudioBuffer::split_data() const { + return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const(); +} + const float* const* AudioBuffer::channels_const_f() const { return data_->fbuf_const()->channels(); } @@ -283,6 +301,24 @@ float* const* AudioBuffer::split_channels_f(Band band) { } } +ChannelBuffer* AudioBuffer::data_f() { + mixed_low_pass_valid_ = false; + return data_->fbuf(); +} + +const ChannelBuffer* AudioBuffer::data_f() const { + return data_->fbuf_const(); +} + +ChannelBuffer* AudioBuffer::split_data_f() { + mixed_low_pass_valid_ = false; + return split_data_.get() ? split_data_->fbuf() : data_->fbuf(); +} + +const ChannelBuffer* AudioBuffer::split_data_f() const { + return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const(); +} + const int16_t* AudioBuffer::mixed_low_pass_data() { // Currently only mixing stereo to mono is supported. assert(num_proc_channels_ == 1 || num_proc_channels_ == 2); diff --git a/webrtc/modules/audio_processing/audio_buffer.h b/webrtc/modules/audio_processing/audio_buffer.h index cdf033671..eb45fb2a7 100644 --- a/webrtc/modules/audio_processing/audio_buffer.h +++ b/webrtc/modules/audio_processing/audio_buffer.h @@ -50,30 +50,59 @@ class AudioBuffer { int num_keyboard_frames() const; int num_bands() const; - // Sample array accessors. Channels are guaranteed to be stored contiguously - // in memory. Prefer to use the const variants of each accessor when - // possible, since they incur less float<->int16 conversion overhead. + // Returns a pointer array to the full-band channels. + // Usage: + // channels()[channel][sample]. + // Where: + // 0 <= channel < |num_proc_channels_| + // 0 <= sample < |proc_num_frames_| int16_t* const* channels(); const int16_t* const* channels_const() const; + float* const* channels_f(); + const float* const* channels_const_f() const; + + // Returns a pointer array to the bands for a specific channel. + // Usage: + // split_bands(channel)[band][sample]. + // Where: + // 0 <= channel < |num_proc_channels_| + // 0 <= band < |num_bands_| + // 0 <= sample < |num_split_frames_| int16_t* const* split_bands(int channel); const int16_t* const* split_bands_const(int channel) const; + float* const* split_bands_f(int channel); + const float* const* split_bands_const_f(int channel) const; + + // Returns a pointer array to the channels for a specific band. + // Usage: + // split_channels(band)[channel][sample]. + // Where: + // 0 <= band < |num_bands_| + // 0 <= channel < |num_proc_channels_| + // 0 <= sample < |num_split_frames_| int16_t* const* split_channels(Band band); const int16_t* const* split_channels_const(Band band) const; + float* const* split_channels_f(Band band); + const float* const* split_channels_const_f(Band band) const; + + // Returns a pointer to the ChannelBuffer that encapsulates the full-band + // data. + ChannelBuffer* data(); + const ChannelBuffer* data() const; + ChannelBuffer* data_f(); + const ChannelBuffer* data_f() const; + + // Returns a pointer to the ChannelBuffer that encapsulates the split data. + ChannelBuffer* split_data(); + const ChannelBuffer* split_data() const; + ChannelBuffer* split_data_f(); + const ChannelBuffer* split_data_f() const; // Returns a pointer to the low-pass data downmixed to mono. If this data // isn't already available it re-calculates it. const int16_t* mixed_low_pass_data(); const int16_t* low_pass_reference(int channel) const; - // Float versions of the accessors, with automatic conversion back and forth - // as necessary. The range of the numbers are the same as for int16_t. - float* const* channels_f(); - const float* const* channels_const_f() const; - float* const* split_bands_f(int channel); - const float* const* split_bands_const_f(int channel) const; - float* const* split_channels_f(Band band); - const float* const* split_channels_const_f(Band band) const; - const float* keyboard_data() const; void set_activity(AudioFrame::VADActivity activity); diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index 06b60385c..091a5574e 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -610,12 +610,7 @@ int AudioProcessingImpl::ProcessStreamLocked() { #ifdef WEBRTC_BEAMFORMER if (beamformer_enabled_) { - beamformer_->ProcessChunk(ca->split_channels_const_f(kBand0To8kHz), - ca->split_channels_const_f(kBand8To16kHz), - ca->num_channels(), - ca->num_frames_per_band(), - ca->split_channels_f(kBand0To8kHz), - ca->split_channels_f(kBand8To16kHz)); + beamformer_->ProcessChunk(ca->split_data_f(), ca->split_data_f()); ca->set_num_channels(1); } #endif diff --git a/webrtc/modules/audio_processing/beamformer/beamformer.cc b/webrtc/modules/audio_processing/beamformer/beamformer.cc index 99fe84645..6cbe6127d 100644 --- a/webrtc/modules/audio_processing/beamformer/beamformer.cc +++ b/webrtc/modules/audio_processing/beamformer/beamformer.cc @@ -295,36 +295,32 @@ void Beamformer::InitInterfCovMats() { } } -void Beamformer::ProcessChunk(const float* const* input, - const float* const* high_pass_split_input, - int num_input_channels, - int num_frames_per_band, - float* const* output, - float* const* high_pass_split_output) { - CHECK_EQ(num_input_channels, num_input_channels_); - CHECK_EQ(num_frames_per_band, chunk_length_); +void Beamformer::ProcessChunk(const ChannelBuffer* input, + ChannelBuffer* output) { + DCHECK_EQ(input->num_channels(), num_input_channels_); + DCHECK_EQ(input->num_frames_per_band(), chunk_length_); float old_high_pass_mask = high_pass_postfilter_mask_; - lapped_transform_->ProcessChunk(input, output); - + lapped_transform_->ProcessChunk(input->channels(0), output->channels(0)); + // Ramp up/down for smoothing. 1 mask per 10ms results in audible + // discontinuities. + const float ramp_increment = + (high_pass_postfilter_mask_ - old_high_pass_mask) / + input->num_frames_per_band(); // Apply delay and sum and post-filter in the time domain. WARNING: only works // because delay-and-sum is not frequency dependent. - if (high_pass_split_input != NULL) { - // Ramp up/down for smoothing. 1 mask per 10ms results in audible - // discontinuities. - float ramp_inc = - (high_pass_postfilter_mask_ - old_high_pass_mask) / num_frames_per_band; - for (int i = 0; i < num_frames_per_band; ++i) { - old_high_pass_mask += ramp_inc; + for (int i = 1; i < input->num_bands(); ++i) { + float smoothed_mask = old_high_pass_mask; + for (int j = 0; j < input->num_frames_per_band(); ++j) { + smoothed_mask += ramp_increment; // Applying the delay and sum (at zero degrees, this is equivalent to // averaging). float sum = 0.f; - for (int j = 0; j < num_input_channels; ++j) { - sum += high_pass_split_input[j][i]; + for (int k = 0; k < input->num_channels(); ++k) { + sum += input->channels(i)[k][j]; } - high_pass_split_output[0][i] = - sum / num_input_channels * old_high_pass_mask; + output->channels(i)[0][j] = sum / input->num_channels() * smoothed_mask; } } } diff --git a/webrtc/modules/audio_processing/beamformer/beamformer.h b/webrtc/modules/audio_processing/beamformer/beamformer.h index 8af3547e2..c3b32ffcd 100644 --- a/webrtc/modules/audio_processing/beamformer/beamformer.h +++ b/webrtc/modules/audio_processing/beamformer/beamformer.h @@ -36,17 +36,12 @@ class Beamformer : public LappedTransform::Callback { // Needs to be called before the Beamformer can be used. virtual void Initialize(int chunk_size_ms, int sample_rate_hz); - // Process one time-domain chunk of audio. The audio can be separated into - // two signals by frequency, with the higher half passed in as the second - // parameter. Use NULL for |high_pass_split_input| if you only have one - // audio signal. The number of frames and channels must correspond to the - // ctor parameters. The same signal can be passed in as |input| and |output|. - virtual void ProcessChunk(const float* const* input, - const float* const* high_pass_split_input, - int num_input_channels, - int num_frames_per_band, - float* const* output, - float* const* high_pass_split_output); + // Process one time-domain chunk of audio. The audio is expected to be split + // into frequency bands inside the ChannelBuffer. The number of frames and + // channels must correspond to the constructor parameters. The same + // ChannelBuffer can be passed in as |input| and |output|. + virtual void ProcessChunk(const ChannelBuffer* input, + ChannelBuffer* output); // After processing each block |is_target_present_| is set to true if the // target signal es present and to false otherwise. This methods can be called // to know if the data is target signal or interference and process it diff --git a/webrtc/modules/audio_processing/beamformer/beamformer_test.cc b/webrtc/modules/audio_processing/beamformer/beamformer_test.cc index 74e845823..00487690e 100644 --- a/webrtc/modules/audio_processing/beamformer/beamformer_test.cc +++ b/webrtc/modules/audio_processing/beamformer/beamformer_test.cc @@ -71,12 +71,7 @@ int main(int argc, char* argv[]) { break; } - bf.ProcessChunk(captured_audio_cb.channels(), - NULL, - FLAGS_num_input_channels, - kChunkSize, - captured_audio_cb.channels(), - NULL); + bf.ProcessChunk(&captured_audio_cb, &captured_audio_cb); webrtc::PcmWriteFromFloat( write_file, kChunkSize, 1, captured_audio_cb.channels()); } diff --git a/webrtc/modules/audio_processing/beamformer/mock_beamformer.h b/webrtc/modules/audio_processing/beamformer/mock_beamformer.h index 2c04a12ae..58995def8 100644 --- a/webrtc/modules/audio_processing/beamformer/mock_beamformer.h +++ b/webrtc/modules/audio_processing/beamformer/mock_beamformer.h @@ -24,12 +24,8 @@ class MockBeamformer : public Beamformer { ~MockBeamformer() override; MOCK_METHOD2(Initialize, void(int chunk_size_ms, int sample_rate_hz)); - MOCK_METHOD6(ProcessChunk, void(const float* const* input, - const float* const* high_pass_split_input, - int num_input_channels, - int num_frames_per_band, - float* const* output, - float* const* high_pass_split_output)); + MOCK_METHOD2(ProcessChunk, void(const ChannelBuffer* input, + ChannelBuffer* output)); MOCK_METHOD0(is_target_present, bool()); };