diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index 5d7de3a1f..2fa6f28be 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -137,11 +137,16 @@ class GainControlForNewAgc : public GainControl, public VolumeCallbacks { AudioProcessing* AudioProcessing::Create() { Config config; - return Create(config); + return Create(config, nullptr); } AudioProcessing* AudioProcessing::Create(const Config& config) { - AudioProcessingImpl* apm = new AudioProcessingImpl(config); + return Create(config, nullptr); +} + +AudioProcessing* AudioProcessing::Create(const Config& config, + Beamformer* beamformer) { + AudioProcessingImpl* apm = new AudioProcessingImpl(config, beamformer); if (apm->Initialize() != kNoError) { delete apm; apm = NULL; @@ -151,6 +156,10 @@ AudioProcessing* AudioProcessing::Create(const Config& config) { } AudioProcessingImpl::AudioProcessingImpl(const Config& config) + : AudioProcessingImpl(config, nullptr) {} + +AudioProcessingImpl::AudioProcessingImpl(const Config& config, + Beamformer* beamformer) : echo_cancellation_(NULL), echo_control_mobile_(NULL), gain_control_(NULL), @@ -181,6 +190,7 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config) #endif transient_suppressor_enabled_(config.Get().enabled), beamformer_enabled_(config.Get().enabled), + beamformer_(beamformer), array_geometry_(config.Get().array_geometry) { echo_cancellation_ = new EchoCancellationImpl(this, crit_); component_list_.push_back(echo_cancellation_); @@ -330,6 +340,11 @@ int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz, num_reverse_channels > 2 || num_reverse_channels < 1) { return kBadNumberChannelsError; } + if (beamformer_enabled_ && + (static_cast(num_input_channels) != array_geometry_.size() || + num_output_channels > 1)) { + return kBadNumberChannelsError; + } fwd_in_format_.set(input_sample_rate_hz, num_input_channels); fwd_out_format_.set(output_sample_rate_hz, num_output_channels); @@ -395,11 +410,6 @@ int AudioProcessingImpl::MaybeInitializeLocked(int input_sample_rate_hz, num_reverse_channels == rev_in_format_.num_channels()) { return kNoError; } - if (beamformer_enabled_ && - (static_cast(num_input_channels) != array_geometry_.size() || - num_output_channels > 1)) { - return kBadNumberChannelsError; - } return InitializeLocked(input_sample_rate_hz, output_sample_rate_hz, reverse_sample_rate_hz, @@ -622,7 +632,9 @@ int AudioProcessingImpl::ProcessStreamLocked() { RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca)); RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca)); - if (use_new_agc_ && gain_control_->is_enabled()) { + if (use_new_agc_ && + gain_control_->is_enabled() && + (!beamformer_enabled_ || beamformer_->is_target_present())) { agc_manager_->Process(ca->split_bands_const(0)[kBand0To8kHz], ca->samples_per_split_channel(), split_rate_); @@ -990,9 +1002,10 @@ int AudioProcessingImpl::InitializeTransient() { void AudioProcessingImpl::InitializeBeamformer() { if (beamformer_enabled_) { #ifdef WEBRTC_BEAMFORMER - beamformer_.reset(new Beamformer(kChunkSizeMs, - split_rate_, - array_geometry_)); + if (!beamformer_) { + beamformer_.reset(new Beamformer(array_geometry_)); + } + beamformer_->Initialize(kChunkSizeMs, split_rate_); #else assert(false); #endif diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h index 08de122b9..65437fe32 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.h +++ b/webrtc/modules/audio_processing/audio_processing_impl.h @@ -86,6 +86,8 @@ class AudioFormat : public AudioRate { class AudioProcessingImpl : public AudioProcessing { public: explicit AudioProcessingImpl(const Config& config); + // Only for testing. + AudioProcessingImpl(const Config& config, Beamformer* beamformer); virtual ~AudioProcessingImpl(); // AudioProcessing methods. diff --git a/webrtc/modules/audio_processing/beamformer/beamformer.cc b/webrtc/modules/audio_processing/beamformer/beamformer.cc index f41462eef..d76fa6888 100644 --- a/webrtc/modules/audio_processing/beamformer/beamformer.cc +++ b/webrtc/modules/audio_processing/beamformer/beamformer.cc @@ -27,7 +27,6 @@ const float kAlpha = 1.5f; // The minimum value a postprocessing mask can take. const float kMaskMinimum = 0.01f; -const int kFftSize = 256; const float kSpeedOfSoundMeterSeconds = 340; // For both target and interf angles, 0 is perpendicular to the microphone @@ -47,8 +46,6 @@ const float kInterfAngleRadians = static_cast(M_PI) / 4.f; // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) const float kBalance = 0.2f; -const int kNumFreqBins = kFftSize / 2 + 1; - // TODO(claguna): need comment here. const float kBeamwidthConstant = 0.00001f; @@ -61,10 +58,6 @@ const float kBoxcarHalfWidth = 0.001f; // that our covariance matrices are positive semidefinite. const float kCovUniformGapHalfWidth = 0.001f; -// How many blocks of past masks (including the current block) we save. Saved -// masks are used for postprocessing such as removing musical noise. -const int kNumberSavedPostfilterMasks = 2; - // Lower bound on gain decay. const float kHalfLifeSeconds = 0.05f; @@ -72,9 +65,15 @@ const float kHalfLifeSeconds = 0.05f; const int kMidFrequnecyLowerBoundHz = 250; const int kMidFrequencyUpperBoundHz = 400; -const int kHighFrequnecyLowerBoundHz = 4000; +const int kHighFrequencyLowerBoundHz = 4000; const int kHighFrequencyUpperBoundHz = 7000; +// Mask threshold over which the data is considered signal and not interference. +const float kMaskTargetThreshold = 0.3f; +// Time in seconds after which the data is considered interference if the mask +// does not pass |kMaskTargetThreshold|. +const float kHoldTargetSeconds = 0.25f; + // Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is // used; to accomplish this, we compute both multiplications in the same loop. float Norm(const ComplexMatrix& mat, @@ -126,46 +125,45 @@ int Round(float x) { } // namespace -Beamformer::Beamformer(int chunk_size_ms, - int sample_rate_hz, - const std::vector& array_geometry) - : chunk_length_(sample_rate_hz / (1000.f / chunk_size_ms)), - window_(new float[kFftSize]), - num_input_channels_(array_geometry.size()), - sample_rate_hz_(sample_rate_hz), - mic_spacing_(MicSpacingFromGeometry(array_geometry)), - decay_threshold_( - pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds))), - mid_frequency_lower_bin_bound_( - Round(kMidFrequnecyLowerBoundHz * kFftSize / sample_rate_hz_)), - mid_frequency_upper_bin_bound_( - Round(kMidFrequencyUpperBoundHz * kFftSize / sample_rate_hz_)), - high_frequency_lower_bin_bound_( - Round(kHighFrequnecyLowerBoundHz * kFftSize / sample_rate_hz_)), - high_frequency_upper_bin_bound_( - Round(kHighFrequencyUpperBoundHz * kFftSize / sample_rate_hz_)), - current_block_ix_(0), - previous_block_ix_(-1), - postfilter_masks_(new MatrixF[kNumberSavedPostfilterMasks]), - delay_sum_masks_(new ComplexMatrixF[kNumFreqBins]), - target_cov_mats_(new ComplexMatrixF[kNumFreqBins]), - interf_cov_mats_(new ComplexMatrixF[kNumFreqBins]), - reflected_interf_cov_mats_(new ComplexMatrixF[kNumFreqBins]), - mask_thresholds_(new float[kNumFreqBins]), - wave_numbers_(new float[kNumFreqBins]), - rxiws_(new float[kNumFreqBins]), - rpsiws_(new float[kNumFreqBins]), - reflected_rpsiws_(new float[kNumFreqBins]) { +Beamformer::Beamformer(const std::vector& array_geometry) + : num_input_channels_(array_geometry.size()), + mic_spacing_(MicSpacingFromGeometry(array_geometry)) { + + WindowGenerator::KaiserBesselDerived(kAlpha, kFftSize, window_); + + for (int i = 0; i < kNumberSavedPostfilterMasks; ++i) { + postfilter_masks_[i].Resize(1, kNumFreqBins); + } +} + +void Beamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { + chunk_length_ = sample_rate_hz / (1000.f / chunk_size_ms); + sample_rate_hz_ = sample_rate_hz; + decay_threshold_ = + pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds)); + mid_frequency_lower_bin_bound_ = + Round(kMidFrequnecyLowerBoundHz * kFftSize / sample_rate_hz_); + mid_frequency_upper_bin_bound_ = + Round(kMidFrequencyUpperBoundHz * kFftSize / sample_rate_hz_); + high_frequency_lower_bin_bound_ = + Round(kHighFrequencyLowerBoundHz * kFftSize / sample_rate_hz_); + high_frequency_upper_bin_bound_ = + Round(kHighFrequencyUpperBoundHz * kFftSize / sample_rate_hz_); + current_block_ix_ = 0; + previous_block_ix_ = -1; + is_target_present_ = false; + hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; + interference_blocks_count_ = hold_target_blocks_; + DCHECK_LE(mid_frequency_upper_bin_bound_, kNumFreqBins); DCHECK_LT(mid_frequency_lower_bin_bound_, mid_frequency_upper_bin_bound_); DCHECK_LE(high_frequency_upper_bin_bound_, kNumFreqBins); DCHECK_LT(high_frequency_lower_bin_bound_, high_frequency_upper_bin_bound_); - WindowGenerator::KaiserBesselDerived(kAlpha, kFftSize, window_.get()); lapped_transform_.reset(new LappedTransform(num_input_channels_, 1, chunk_length_, - window_.get(), + window_, kFftSize, kFftSize / 2, this)); @@ -196,9 +194,6 @@ Beamformer::Beamformer(int chunk_size_ms, reflected_rpsiws_[i] = Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]); } - for (int i = 0; i < kNumberSavedPostfilterMasks; ++i) { - postfilter_masks_[i].Resize(1, kNumFreqBins); - } } void Beamformer::InitDelaySumMasks() { @@ -379,6 +374,8 @@ void Beamformer::ProcessAudioBlock(const complex_f* const* input, mask_thresholds_[i]); } + EstimateTargetPresence(mask_data, kNumFreqBins); + // Can't access block_index - 1 on the first block. if (previous_block_ix_ >= 0) { ApplyDecay(); @@ -490,4 +487,18 @@ float Beamformer::MicSpacingFromGeometry(const std::vector& geometry) { return sqrt(mic_spacing); } +void Beamformer::EstimateTargetPresence(float* mask, int length) { + memcpy(sorted_mask_, mask, kNumFreqBins * sizeof(*mask)); + const int median_ix = (length + 1) / 2; + std::nth_element(sorted_mask_, + sorted_mask_ + median_ix, + sorted_mask_ + length); + if (sorted_mask_[median_ix] > kMaskTargetThreshold) { + is_target_present_ = true; + interference_blocks_count_ = 0; + } else { + is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; + } +} + } // namespace webrtc diff --git a/webrtc/modules/audio_processing/beamformer/beamformer.h b/webrtc/modules/audio_processing/beamformer/beamformer.h index d50b684a3..427297b50 100644 --- a/webrtc/modules/audio_processing/beamformer/beamformer.h +++ b/webrtc/modules/audio_processing/beamformer/beamformer.h @@ -29,22 +29,29 @@ class Beamformer : public LappedTransform::Callback { public: // At the moment it only accepts uniform linear microphone arrays. Using the // first microphone as a reference position [0, 0, 0] is a natural choice. - Beamformer(int chunk_size_ms, - // Sample rate corresponds to the lower band. - int sample_rate_hz, - const std::vector& array_geometry); + explicit Beamformer(const std::vector& array_geometry); + virtual ~Beamformer() {}; + + // Sample rate corresponds to the lower band. + // Needs to be called before the Beamformer can be used. + virtual void Initialize(int chunk_size_ms, int sample_rate_hz); // Process one time-domain chunk of audio. The audio can be separated into // two signals by frequency, with the higher half passed in as the second // parameter. Use NULL for |high_pass_split_input| if you only have one // audio signal. The number of frames and channels must correspond to the // ctor parameters. The same signal can be passed in as |input| and |output|. - void ProcessChunk(const float* const* input, - const float* const* high_pass_split_input, - int num_input_channels, - int num_frames_per_band, - float* const* output, - float* const* high_pass_split_output); + virtual void ProcessChunk(const float* const* input, + const float* const* high_pass_split_input, + int num_input_channels, + int num_frames_per_band, + float* const* output, + float* const* high_pass_split_output); + // After processing each block |is_target_present_| is set to true if the + // target signal es present and to false otherwise. This methods can be called + // to know if the data is target signal or interference and process it + // accordingly. + virtual bool is_target_present() { return is_target_present_; } protected: // Process one frequency-domain block of audio. This is where the fun @@ -53,7 +60,7 @@ class Beamformer : public LappedTransform::Callback { int num_input_channels, int num_freq_bins, int num_output_channels, - complex* const* output); + complex* const* output) override; private: typedef Matrix MatrixF; @@ -93,23 +100,30 @@ class Beamformer : public LappedTransform::Callback { void ApplyMasks(const complex_f* const* input, complex_f* const* output); float MicSpacingFromGeometry(const std::vector& array_geometry); + void EstimateTargetPresence(float* mask, int length); + + static const int kFftSize = 256; + static const int kNumFreqBins = kFftSize / 2 + 1; + // How many blocks of past masks (including the current block) we save. Saved + // masks are used for postprocessing such as removing musical noise. + static const int kNumberSavedPostfilterMasks = 2; // Deals with the fft transform and blocking. - const int chunk_length_; + int chunk_length_; scoped_ptr lapped_transform_; - scoped_ptr window_; + float window_[kFftSize]; // Parameters exposed to the user. const int num_input_channels_; - const int sample_rate_hz_; + int sample_rate_hz_; const float mic_spacing_; // Calculated based on user-input and constants in the .cc file. - const float decay_threshold_; - const int mid_frequency_lower_bin_bound_; - const int mid_frequency_upper_bin_bound_; - const int high_frequency_lower_bin_bound_; - const int high_frequency_upper_bin_bound_; + float decay_threshold_; + int mid_frequency_lower_bin_bound_; + int mid_frequency_upper_bin_bound_; + int high_frequency_lower_bin_bound_; + int high_frequency_upper_bin_bound_; // Indices into |postfilter_masks_|. int current_block_ix_; @@ -117,29 +131,30 @@ class Beamformer : public LappedTransform::Callback { // Old masks are saved in this ring buffer for smoothing. Array of length // |kNumberSavedMasks| matrix of size 1 x |kNumFreqBins|. - scoped_ptr postfilter_masks_; + MatrixF postfilter_masks_[kNumberSavedPostfilterMasks]; + float sorted_mask_[kNumFreqBins]; // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. - scoped_ptr delay_sum_masks_; + ComplexMatrixF delay_sum_masks_[kNumFreqBins]; // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x // |num_input_channels_|. - scoped_ptr target_cov_mats_; + ComplexMatrixF target_cov_mats_[kNumFreqBins]; // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x // |num_input_channels_|. - scoped_ptr interf_cov_mats_; - scoped_ptr reflected_interf_cov_mats_; + ComplexMatrixF interf_cov_mats_[kNumFreqBins]; + ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins]; // Of length |kNumFreqBins|. - scoped_ptr mask_thresholds_; - scoped_ptr wave_numbers_; + float mask_thresholds_[kNumFreqBins]; + float wave_numbers_[kNumFreqBins]; // Preallocated for ProcessAudioBlock() // Of length |kNumFreqBins|. - scoped_ptr rxiws_; - scoped_ptr rpsiws_; - scoped_ptr reflected_rpsiws_; + float rxiws_[kNumFreqBins]; + float rpsiws_[kNumFreqBins]; + float reflected_rpsiws_[kNumFreqBins]; // The microphone normalization factor. ComplexMatrixF eig_m_; @@ -148,6 +163,14 @@ class Beamformer : public LappedTransform::Callback { bool high_pass_exists_; int num_blocks_in_this_chunk_; float high_pass_postfilter_mask_; + + // True when the target signal is present. + bool is_target_present_; + // Number of blocks after which the data is considered interference if the + // mask does not pass |kMaskSignalThreshold|. + int hold_target_blocks_; + // Number of blocks since the last mask that passed |kMaskSignalThreshold|. + int interference_blocks_count_; }; } // namespace webrtc diff --git a/webrtc/modules/audio_processing/beamformer/beamformer_test.cc b/webrtc/modules/audio_processing/beamformer/beamformer_test.cc index e20c3a9a2..74e845823 100644 --- a/webrtc/modules/audio_processing/beamformer/beamformer_test.cc +++ b/webrtc/modules/audio_processing/beamformer/beamformer_test.cc @@ -59,9 +59,8 @@ int main(int argc, char* argv[]) { for (int i = 0; i < FLAGS_num_input_channels; ++i) { array_geometry.push_back(webrtc::Point(i * FLAGS_mic_spacing, 0.f, 0.f)); } - webrtc::Beamformer bf(kChunkTimeMilliseconds, - FLAGS_sample_rate, - array_geometry); + webrtc::Beamformer bf(array_geometry); + bf.Initialize(kChunkTimeMilliseconds, FLAGS_sample_rate); while (true) { size_t samples_read = webrtc::PcmReadToFloat(read_file, kInputSamplesPerChunk, diff --git a/webrtc/modules/audio_processing/beamformer/mock_beamformer.cc b/webrtc/modules/audio_processing/beamformer/mock_beamformer.cc new file mode 100644 index 000000000..2319c32fd --- /dev/null +++ b/webrtc/modules/audio_processing/beamformer/mock_beamformer.cc @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/beamformer/mock_beamformer.h" + +#include + +namespace webrtc { + +MockBeamformer::MockBeamformer(const std::vector& array_geometry) + : Beamformer(array_geometry) {} + +MockBeamformer::~MockBeamformer() {} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/beamformer/mock_beamformer.h b/webrtc/modules/audio_processing/beamformer/mock_beamformer.h new file mode 100644 index 000000000..2c04a12ae --- /dev/null +++ b/webrtc/modules/audio_processing/beamformer/mock_beamformer.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MOCK_BEAMFORMER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MOCK_BEAMFORMER_H_ + +#include + +#include "testing/gmock/include/gmock/gmock.h" +#include "webrtc/modules/audio_processing/beamformer/beamformer.h" + +namespace webrtc { + +class MockBeamformer : public Beamformer { + public: + explicit MockBeamformer(const std::vector& array_geometry); + ~MockBeamformer() override; + + MOCK_METHOD2(Initialize, void(int chunk_size_ms, int sample_rate_hz)); + MOCK_METHOD6(ProcessChunk, void(const float* const* input, + const float* const* high_pass_split_input, + int num_input_channels, + int num_frames_per_band, + float* const* output, + float* const* high_pass_split_output)); + MOCK_METHOD0(is_target_present, bool()); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MOCK_BEAMFORMER_H_ diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h index d23a9ae42..6b761e14b 100644 --- a/webrtc/modules/audio_processing/include/audio_processing.h +++ b/webrtc/modules/audio_processing/include/audio_processing.h @@ -24,6 +24,7 @@ struct AecCore; namespace webrtc { class AudioFrame; +class Beamformer; class EchoCancellation; class EchoControlMobile; class GainControl; @@ -199,6 +200,8 @@ class AudioProcessing { static AudioProcessing* Create(); // Allows passing in an optional configuration at create-time. static AudioProcessing* Create(const Config& config); + // Only for testing. + static AudioProcessing* Create(const Config& config, Beamformer* beamformer); virtual ~AudioProcessing() {} // Initializes internal states, while retaining all user settings. This diff --git a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc index 217ffaef0..931169e84 100644 --- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc +++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc @@ -18,6 +18,7 @@ #include "webrtc/common_audio/resampler/include/push_resampler.h" #include "webrtc/common_audio/resampler/push_sinc_resampler.h" #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/beamformer/mock_beamformer.h" #include "webrtc/modules/audio_processing/common.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/modules/audio_processing/test/test_utils.h" @@ -278,6 +279,35 @@ void OpenFileAndReadMessage(const std::string filename, fclose(file); } +// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed +// stereo) file, converts to deinterleaved float (optionally downmixing) and +// returns the result in |cb|. Returns false if the file ended (or on error) and +// true otherwise. +// +// |int_data| and |float_data| are just temporary space that must be +// sufficiently large to hold the 10 ms chunk. +bool ReadChunk(FILE* file, int16_t* int_data, float* float_data, + ChannelBuffer* cb) { + // The files always contain stereo audio. + size_t frame_size = cb->samples_per_channel() * 2; + size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file); + if (read_count != frame_size) { + // Check that the file really ended. + assert(feof(file)); + return false; // This is expected. + } + + S16ToFloat(int_data, frame_size, float_data); + if (cb->num_channels() == 1) { + MixStereoToMono(float_data, cb->data(), cb->samples_per_channel()); + } else { + Deinterleave(float_data, cb->samples_per_channel(), 2, + cb->channels()); + } + + return true; +} + class ApmTest : public ::testing::Test { protected: ApmTest(); @@ -1164,6 +1194,87 @@ TEST_F(ApmTest, ManualVolumeChangeIsPossible) { } } +#if !defined(WEBRTC_ANDROID) && !defined(WEBRTC_IOS) +TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) { + const int kSampleRateHz = 16000; + const int kSamplesPerChannel = + AudioProcessing::kChunkSizeMs * kSampleRateHz / 1000; + const int kNumInputChannels = 2; + const int kNumOutputChannels = 1; + const int kNumChunks = 700; + const float kScaleFactor = 0.25f; + Config config; + std::vector geometry; + geometry.push_back(webrtc::Point(0.f, 0.f, 0.f)); + geometry.push_back(webrtc::Point(0.05f, 0.f, 0.f)); + config.Set(new Beamforming(true, geometry)); + testing::NiceMock* beamformer = + new testing::NiceMock(geometry); + scoped_ptr apm(AudioProcessing::Create(config, beamformer)); + EXPECT_EQ(kNoErr, apm->gain_control()->Enable(true)); + ChannelBuffer src_buf(kSamplesPerChannel, kNumInputChannels); + ChannelBuffer dest_buf(kSamplesPerChannel, kNumOutputChannels); + const int max_length = kSamplesPerChannel * std::max(kNumInputChannels, + kNumOutputChannels); + scoped_ptr int_data(new int16_t[max_length]); + scoped_ptr float_data(new float[max_length]); + std::string filename = ResourceFilePath("far", kSampleRateHz); + FILE* far_file = fopen(filename.c_str(), "rb"); + ASSERT_TRUE(far_file != NULL) << "Could not open file " << filename << "\n"; + const int kDefaultVolume = apm->gain_control()->stream_analog_level(); + const int kDefaultCompressionGain = + apm->gain_control()->compression_gain_db(); + bool is_target = false; + EXPECT_CALL(*beamformer, is_target_present()) + .WillRepeatedly(testing::ReturnPointee(&is_target)); + for (int i = 0; i < kNumChunks; ++i) { + ASSERT_TRUE(ReadChunk(far_file, + int_data.get(), + float_data.get(), + &src_buf)); + for (int j = 0; j < kNumInputChannels * kSamplesPerChannel; ++j) { + src_buf.data()[j] *= kScaleFactor; + } + EXPECT_EQ(kNoErr, + apm->ProcessStream(src_buf.channels(), + src_buf.samples_per_channel(), + kSampleRateHz, + LayoutFromChannels(src_buf.num_channels()), + kSampleRateHz, + LayoutFromChannels(dest_buf.num_channels()), + dest_buf.channels())); + } + EXPECT_EQ(kDefaultVolume, + apm->gain_control()->stream_analog_level()); + EXPECT_EQ(kDefaultCompressionGain, + apm->gain_control()->compression_gain_db()); + rewind(far_file); + is_target = true; + for (int i = 0; i < kNumChunks; ++i) { + ASSERT_TRUE(ReadChunk(far_file, + int_data.get(), + float_data.get(), + &src_buf)); + for (int j = 0; j < kNumInputChannels * kSamplesPerChannel; ++j) { + src_buf.data()[j] *= kScaleFactor; + } + EXPECT_EQ(kNoErr, + apm->ProcessStream(src_buf.channels(), + src_buf.samples_per_channel(), + kSampleRateHz, + LayoutFromChannels(src_buf.num_channels()), + kSampleRateHz, + LayoutFromChannels(dest_buf.num_channels()), + dest_buf.channels())); + } + EXPECT_LT(kDefaultVolume, + apm->gain_control()->stream_analog_level()); + EXPECT_LT(kDefaultCompressionGain, + apm->gain_control()->compression_gain_db()); + ASSERT_EQ(0, fclose(far_file)); +} +#endif + TEST_F(ApmTest, NoiseSuppression) { // Test valid suppression levels. NoiseSuppression::Level level[] = { @@ -2031,35 +2142,6 @@ TEST_F(ApmTest, NoErrorsWithKeyboardChannel) { } } -// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed -// stereo) file, converts to deinterleaved float (optionally downmixing) and -// returns the result in |cb|. Returns false if the file ended (or on error) and -// true otherwise. -// -// |int_data| and |float_data| are just temporary space that must be -// sufficiently large to hold the 10 ms chunk. -bool ReadChunk(FILE* file, int16_t* int_data, float* float_data, - ChannelBuffer* cb) { - // The files always contain stereo audio. - size_t frame_size = cb->samples_per_channel() * 2; - size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file); - if (read_count != frame_size) { - // Check that the file really ended. - assert(feof(file)); - return false; // This is expected. - } - - S16ToFloat(int_data, frame_size, float_data); - if (cb->num_channels() == 1) { - MixStereoToMono(float_data, cb->data(), cb->samples_per_channel()); - } else { - Deinterleave(float_data, cb->samples_per_channel(), 2, - cb->channels()); - } - - return true; -} - // Compares the reference and test arrays over a region around the expected // delay. Finds the highest SNR in that region and adds the variance and squared // error results to the supplied accumulators. diff --git a/webrtc/modules/modules.gyp b/webrtc/modules/modules.gyp index 92a7721b7..1477d4349 100644 --- a/webrtc/modules/modules.gyp +++ b/webrtc/modules/modules.gyp @@ -180,6 +180,8 @@ 'audio_processing/beamformer/complex_matrix_unittest.cc', 'audio_processing/beamformer/covariance_matrix_generator_unittest.cc', 'audio_processing/beamformer/matrix_unittest.cc', + 'audio_processing/beamformer/mock_beamformer.cc', + 'audio_processing/beamformer/mock_beamformer.h', 'audio_processing/beamformer/pcm_utils.cc', 'audio_processing/beamformer/pcm_utils.h', 'audio_processing/echo_cancellation_impl_unittest.cc',