Only adapt AGC when the desired signal is present

Take the 50% quantile of the mask and compare it to certain threshold to determine if the desired signal is present. A hold is applied to avoid fast switching between states. is_signal_present_ has been plotted and looks as expected. The AGC adaptation sounds promising, specially for the cases when the speaker fades in and out from the beam direction. R=andrew@webrtc.org Review URL: https://webrtc-codereview.appspot.com/28329005 git-svn-id: http://webrtc.googlecode.com/svn/trunk@8078 4adac7df-926f-26a2-2b94-8c16560cd09d
2015-01-15 18:07:21 +00:00 · 2015-01-15 18:07:21 +00:00 · d82f55d2a7
commit d82f55d2a7
parent 3e42a8a56a
10 changed files with 310 additions and 115 deletions
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@ -137,11 +137,16 @@ class GainControlForNewAgc : public GainControl, public VolumeCallbacks {

 AudioProcessing* AudioProcessing::Create() {
  Config config;
-  return Create(config);
+  return Create(config, nullptr);
 }

 AudioProcessing* AudioProcessing::Create(const Config& config) {
-  AudioProcessingImpl* apm = new AudioProcessingImpl(config);
+  return Create(config, nullptr);
+}
+
+AudioProcessing* AudioProcessing::Create(const Config& config,
+                                         Beamformer* beamformer) {
+  AudioProcessingImpl* apm = new AudioProcessingImpl(config, beamformer);
  if (apm->Initialize() != kNoError) {
    delete apm;
    apm = NULL;
@ -151,6 +156,10 @@ AudioProcessing* AudioProcessing::Create(const Config& config) {
 }

 AudioProcessingImpl::AudioProcessingImpl(const Config& config)
+    : AudioProcessingImpl(config, nullptr) {}
+
+AudioProcessingImpl::AudioProcessingImpl(const Config& config,
+                                         Beamformer* beamformer)
    : echo_cancellation_(NULL),
      echo_control_mobile_(NULL),
      gain_control_(NULL),
@ -181,6 +190,7 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config)
 #endif
      transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled),
      beamformer_enabled_(config.Get<Beamforming>().enabled),
+      beamformer_(beamformer),
      array_geometry_(config.Get<Beamforming>().array_geometry) {
  echo_cancellation_ = new EchoCancellationImpl(this, crit_);
  component_list_.push_back(echo_cancellation_);
@ -330,6 +340,11 @@ int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz,
      num_reverse_channels > 2 || num_reverse_channels < 1) {
    return kBadNumberChannelsError;
  }
+  if (beamformer_enabled_ &&
+      (static_cast<size_t>(num_input_channels) != array_geometry_.size() ||
+       num_output_channels > 1)) {
+    return kBadNumberChannelsError;
+  }

  fwd_in_format_.set(input_sample_rate_hz, num_input_channels);
  fwd_out_format_.set(output_sample_rate_hz, num_output_channels);
@ -395,11 +410,6 @@ int AudioProcessingImpl::MaybeInitializeLocked(int input_sample_rate_hz,
      num_reverse_channels == rev_in_format_.num_channels()) {
    return kNoError;
  }
-  if (beamformer_enabled_ &&
-      (static_cast<size_t>(num_input_channels) != array_geometry_.size() ||
-       num_output_channels > 1)) {
-    return kBadNumberChannelsError;
-  }
  return InitializeLocked(input_sample_rate_hz,
                          output_sample_rate_hz,
                          reverse_sample_rate_hz,
@ -622,7 +632,9 @@ int AudioProcessingImpl::ProcessStreamLocked() {
  RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca));
  RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca));

-  if (use_new_agc_ && gain_control_->is_enabled()) {
+  if (use_new_agc_ &&
+      gain_control_->is_enabled() &&
+      (!beamformer_enabled_ || beamformer_->is_target_present())) {
    agc_manager_->Process(ca->split_bands_const(0)[kBand0To8kHz],
                          ca->samples_per_split_channel(),
                          split_rate_);
@ -990,9 +1002,10 @@ int AudioProcessingImpl::InitializeTransient() {
 void AudioProcessingImpl::InitializeBeamformer() {
  if (beamformer_enabled_) {
 #ifdef WEBRTC_BEAMFORMER
-    beamformer_.reset(new Beamformer(kChunkSizeMs,
-                                     split_rate_,
-                                     array_geometry_));
+    if (!beamformer_) {
+      beamformer_.reset(new Beamformer(array_geometry_));
+    }
+    beamformer_->Initialize(kChunkSizeMs, split_rate_);
 #else
    assert(false);
 #endif
--- a/webrtc/modules/audio_processing/audio_processing_impl.h
+++ b/webrtc/modules/audio_processing/audio_processing_impl.h
@ -86,6 +86,8 @@ class AudioFormat : public AudioRate {
 class AudioProcessingImpl : public AudioProcessing {
 public:
  explicit AudioProcessingImpl(const Config& config);
+  // Only for testing.
+  AudioProcessingImpl(const Config& config, Beamformer* beamformer);
  virtual ~AudioProcessingImpl();

  // AudioProcessing methods.
--- a/webrtc/modules/audio_processing/beamformer/beamformer.cc
+++ b/webrtc/modules/audio_processing/beamformer/beamformer.cc
@ -27,7 +27,6 @@ const float kAlpha = 1.5f;
 // The minimum value a postprocessing mask can take.
 const float kMaskMinimum = 0.01f;

-const int kFftSize = 256;
 const float kSpeedOfSoundMeterSeconds = 340;

 // For both target and interf angles, 0 is perpendicular to the microphone
@ -47,8 +46,6 @@ const float kInterfAngleRadians = static_cast<float>(M_PI) / 4.f;
 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
 const float kBalance = 0.2f;

-const int kNumFreqBins = kFftSize / 2 + 1;
-
 // TODO(claguna): need comment here.
 const float kBeamwidthConstant = 0.00001f;

@ -61,10 +58,6 @@ const float kBoxcarHalfWidth = 0.001f;
 // that our covariance matrices are positive semidefinite.
 const float kCovUniformGapHalfWidth = 0.001f;

-// How many blocks of past masks (including the current block) we save. Saved
-// masks are used for postprocessing such as removing musical noise.
-const int kNumberSavedPostfilterMasks = 2;
-
 // Lower bound on gain decay.
 const float kHalfLifeSeconds = 0.05f;

@ -72,9 +65,15 @@ const float kHalfLifeSeconds = 0.05f;
 const int kMidFrequnecyLowerBoundHz = 250;
 const int kMidFrequencyUpperBoundHz = 400;

-const int kHighFrequnecyLowerBoundHz = 4000;
+const int kHighFrequencyLowerBoundHz = 4000;
 const int kHighFrequencyUpperBoundHz = 7000;

+// Mask threshold over which the data is considered signal and not interference.
+const float kMaskTargetThreshold = 0.3f;
+// Time in seconds after which the data is considered interference if the mask
+// does not pass |kMaskTargetThreshold|.
+const float kHoldTargetSeconds = 0.25f;
+
 // Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is
 // used; to accomplish this, we compute both multiplications in the same loop.
 float Norm(const ComplexMatrix<float>& mat,
@ -126,46 +125,45 @@ int Round(float x) {

 }  // namespace

-Beamformer::Beamformer(int chunk_size_ms,
-                       int sample_rate_hz,
-                       const std::vector<Point>& array_geometry)
-    : chunk_length_(sample_rate_hz / (1000.f / chunk_size_ms)),
-      window_(new float[kFftSize]),
-      num_input_channels_(array_geometry.size()),
-      sample_rate_hz_(sample_rate_hz),
-      mic_spacing_(MicSpacingFromGeometry(array_geometry)),
-      decay_threshold_(
-          pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds))),
-      mid_frequency_lower_bin_bound_(
-          Round(kMidFrequnecyLowerBoundHz * kFftSize / sample_rate_hz_)),
-      mid_frequency_upper_bin_bound_(
-          Round(kMidFrequencyUpperBoundHz * kFftSize / sample_rate_hz_)),
-      high_frequency_lower_bin_bound_(
-          Round(kHighFrequnecyLowerBoundHz * kFftSize / sample_rate_hz_)),
-      high_frequency_upper_bin_bound_(
-          Round(kHighFrequencyUpperBoundHz * kFftSize / sample_rate_hz_)),
-      current_block_ix_(0),
-      previous_block_ix_(-1),
-      postfilter_masks_(new MatrixF[kNumberSavedPostfilterMasks]),
-      delay_sum_masks_(new ComplexMatrixF[kNumFreqBins]),
-      target_cov_mats_(new ComplexMatrixF[kNumFreqBins]),
-      interf_cov_mats_(new ComplexMatrixF[kNumFreqBins]),
-      reflected_interf_cov_mats_(new ComplexMatrixF[kNumFreqBins]),
-      mask_thresholds_(new float[kNumFreqBins]),
-      wave_numbers_(new float[kNumFreqBins]),
-      rxiws_(new float[kNumFreqBins]),
-      rpsiws_(new float[kNumFreqBins]),
-      reflected_rpsiws_(new float[kNumFreqBins]) {
+Beamformer::Beamformer(const std::vector<Point>& array_geometry)
+    : num_input_channels_(array_geometry.size()),
+      mic_spacing_(MicSpacingFromGeometry(array_geometry)) {
+
+  WindowGenerator::KaiserBesselDerived(kAlpha, kFftSize, window_);
+
+  for (int i = 0; i < kNumberSavedPostfilterMasks; ++i) {
+    postfilter_masks_[i].Resize(1, kNumFreqBins);
+  }
+}
+
+void Beamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
+  chunk_length_ = sample_rate_hz / (1000.f / chunk_size_ms);
+  sample_rate_hz_ = sample_rate_hz;
+  decay_threshold_ =
+      pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds));
+  mid_frequency_lower_bin_bound_ =
+      Round(kMidFrequnecyLowerBoundHz * kFftSize / sample_rate_hz_);
+  mid_frequency_upper_bin_bound_ =
+      Round(kMidFrequencyUpperBoundHz * kFftSize / sample_rate_hz_);
+  high_frequency_lower_bin_bound_ =
+      Round(kHighFrequencyLowerBoundHz * kFftSize / sample_rate_hz_);
+  high_frequency_upper_bin_bound_ =
+      Round(kHighFrequencyUpperBoundHz * kFftSize / sample_rate_hz_);
+  current_block_ix_ = 0;
+  previous_block_ix_ = -1;
+  is_target_present_ = false;
+  hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
+  interference_blocks_count_ = hold_target_blocks_;
+
  DCHECK_LE(mid_frequency_upper_bin_bound_, kNumFreqBins);
  DCHECK_LT(mid_frequency_lower_bin_bound_, mid_frequency_upper_bin_bound_);
  DCHECK_LE(high_frequency_upper_bin_bound_, kNumFreqBins);
  DCHECK_LT(high_frequency_lower_bin_bound_, high_frequency_upper_bin_bound_);

-  WindowGenerator::KaiserBesselDerived(kAlpha, kFftSize, window_.get());
  lapped_transform_.reset(new LappedTransform(num_input_channels_,
                                              1,
                                              chunk_length_,
-                                              window_.get(),
+                                              window_,
                                              kFftSize,
                                              kFftSize / 2,
                                              this));
@ -196,9 +194,6 @@ Beamformer::Beamformer(int chunk_size_ms,
    reflected_rpsiws_[i] =
        Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]);
  }
-  for (int i = 0; i < kNumberSavedPostfilterMasks; ++i) {
-    postfilter_masks_[i].Resize(1, kNumFreqBins);
-  }
 }

 void Beamformer::InitDelaySumMasks() {
@ -379,6 +374,8 @@ void Beamformer::ProcessAudioBlock(const complex_f* const* input,
                                            mask_thresholds_[i]);
  }

+  EstimateTargetPresence(mask_data, kNumFreqBins);
+
  // Can't access block_index - 1 on the first block.
  if (previous_block_ix_ >= 0) {
    ApplyDecay();
@ -490,4 +487,18 @@ float Beamformer::MicSpacingFromGeometry(const std::vector<Point>& geometry) {
  return sqrt(mic_spacing);
 }

+void Beamformer::EstimateTargetPresence(float* mask, int length) {
+  memcpy(sorted_mask_, mask, kNumFreqBins * sizeof(*mask));
+  const int median_ix = (length + 1) / 2;
+  std::nth_element(sorted_mask_,
+                   sorted_mask_ + median_ix,
+                   sorted_mask_ + length);
+  if (sorted_mask_[median_ix] > kMaskTargetThreshold) {
+    is_target_present_ = true;
+    interference_blocks_count_ = 0;
+  } else {
+    is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
+  }
+}
+
 }  // namespace webrtc
--- a/webrtc/modules/audio_processing/beamformer/beamformer.h
+++ b/webrtc/modules/audio_processing/beamformer/beamformer.h
@ -29,22 +29,29 @@ class Beamformer : public LappedTransform::Callback {
 public:
  // At the moment it only accepts uniform linear microphone arrays. Using the
  // first microphone as a reference position [0, 0, 0] is a natural choice.
-  Beamformer(int chunk_size_ms,
-             // Sample rate corresponds to the lower band.
-             int sample_rate_hz,
-             const std::vector<Point>& array_geometry);
+  explicit Beamformer(const std::vector<Point>& array_geometry);
+  virtual ~Beamformer() {};
+
+  // Sample rate corresponds to the lower band.
+  // Needs to be called before the Beamformer can be used.
+  virtual void Initialize(int chunk_size_ms, int sample_rate_hz);

  // Process one time-domain chunk of audio. The audio can be separated into
  // two signals by frequency, with the higher half passed in as the second
  // parameter. Use NULL for |high_pass_split_input| if you only have one
  // audio signal. The number of frames and channels must correspond to the
  // ctor parameters. The same signal can be passed in as |input| and |output|.
-  void ProcessChunk(const float* const* input,
-                    const float* const* high_pass_split_input,
-                    int num_input_channels,
-                    int num_frames_per_band,
-                    float* const* output,
-                    float* const* high_pass_split_output);
+  virtual void ProcessChunk(const float* const* input,
+                            const float* const* high_pass_split_input,
+                            int num_input_channels,
+                            int num_frames_per_band,
+                            float* const* output,
+                            float* const* high_pass_split_output);
+  // After processing each block |is_target_present_| is set to true if the
+  // target signal es present and to false otherwise. This methods can be called
+  // to know if the data is target signal or interference and process it
+  // accordingly.
+  virtual bool is_target_present() { return is_target_present_; }

 protected:
  // Process one frequency-domain block of audio. This is where the fun
@ -53,7 +60,7 @@ class Beamformer : public LappedTransform::Callback {
                         int num_input_channels,
                         int num_freq_bins,
                         int num_output_channels,
-                         complex<float>* const* output);
+                         complex<float>* const* output) override;

 private:
  typedef Matrix<float> MatrixF;
@ -93,23 +100,30 @@ class Beamformer : public LappedTransform::Callback {
  void ApplyMasks(const complex_f* const* input, complex_f* const* output);

  float MicSpacingFromGeometry(const std::vector<Point>& array_geometry);
+  void EstimateTargetPresence(float* mask, int length);
+
+  static const int kFftSize = 256;
+  static const int kNumFreqBins = kFftSize / 2 + 1;
+  // How many blocks of past masks (including the current block) we save. Saved
+  // masks are used for postprocessing such as removing musical noise.
+  static const int kNumberSavedPostfilterMasks = 2;

  // Deals with the fft transform and blocking.
-  const int chunk_length_;
+  int chunk_length_;
  scoped_ptr<LappedTransform> lapped_transform_;
-  scoped_ptr<float[]> window_;
+  float window_[kFftSize];

  // Parameters exposed to the user.
  const int num_input_channels_;
-  const int sample_rate_hz_;
+  int sample_rate_hz_;
  const float mic_spacing_;

  // Calculated based on user-input and constants in the .cc file.
-  const float decay_threshold_;
-  const int mid_frequency_lower_bin_bound_;
-  const int mid_frequency_upper_bin_bound_;
-  const int high_frequency_lower_bin_bound_;
-  const int high_frequency_upper_bin_bound_;
+  float decay_threshold_;
+  int mid_frequency_lower_bin_bound_;
+  int mid_frequency_upper_bin_bound_;
+  int high_frequency_lower_bin_bound_;
+  int high_frequency_upper_bin_bound_;

  // Indices into |postfilter_masks_|.
  int current_block_ix_;
@ -117,29 +131,30 @@ class Beamformer : public LappedTransform::Callback {

  // Old masks are saved in this ring buffer for smoothing. Array of length
  // |kNumberSavedMasks| matrix of size 1 x |kNumFreqBins|.
-  scoped_ptr<MatrixF[]> postfilter_masks_;
+  MatrixF postfilter_masks_[kNumberSavedPostfilterMasks];
+  float sorted_mask_[kNumFreqBins];

  // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
-  scoped_ptr<ComplexMatrixF[]> delay_sum_masks_;
+  ComplexMatrixF delay_sum_masks_[kNumFreqBins];

  // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
  // |num_input_channels_|.
-  scoped_ptr<ComplexMatrixF[]> target_cov_mats_;
+  ComplexMatrixF target_cov_mats_[kNumFreqBins];

  // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
  // |num_input_channels_|.
-  scoped_ptr<ComplexMatrixF[]> interf_cov_mats_;
-  scoped_ptr<ComplexMatrixF[]> reflected_interf_cov_mats_;
+  ComplexMatrixF interf_cov_mats_[kNumFreqBins];
+  ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins];

  // Of length |kNumFreqBins|.
-  scoped_ptr<float[]> mask_thresholds_;
-  scoped_ptr<float[]> wave_numbers_;
+  float mask_thresholds_[kNumFreqBins];
+  float wave_numbers_[kNumFreqBins];

  // Preallocated for ProcessAudioBlock()
  // Of length |kNumFreqBins|.
-  scoped_ptr<float[]> rxiws_;
-  scoped_ptr<float[]> rpsiws_;
-  scoped_ptr<float[]> reflected_rpsiws_;
+  float rxiws_[kNumFreqBins];
+  float rpsiws_[kNumFreqBins];
+  float reflected_rpsiws_[kNumFreqBins];

  // The microphone normalization factor.
  ComplexMatrixF eig_m_;
@ -148,6 +163,14 @@ class Beamformer : public LappedTransform::Callback {
  bool high_pass_exists_;
  int num_blocks_in_this_chunk_;
  float high_pass_postfilter_mask_;
+
+  // True when the target signal is present.
+  bool is_target_present_;
+  // Number of blocks after which the data is considered interference if the
+  // mask does not pass |kMaskSignalThreshold|.
+  int hold_target_blocks_;
+  // Number of blocks since the last mask that passed |kMaskSignalThreshold|.
+  int interference_blocks_count_;
 };

 }  // namespace webrtc
--- a/webrtc/modules/audio_processing/beamformer/beamformer_test.cc
+++ b/webrtc/modules/audio_processing/beamformer/beamformer_test.cc
@ -59,9 +59,8 @@ int main(int argc, char* argv[]) {
  for (int i = 0; i < FLAGS_num_input_channels; ++i) {
    array_geometry.push_back(webrtc::Point(i * FLAGS_mic_spacing, 0.f, 0.f));
  }
-  webrtc::Beamformer bf(kChunkTimeMilliseconds,
-                        FLAGS_sample_rate,
-                        array_geometry);
+  webrtc::Beamformer bf(array_geometry);
+  bf.Initialize(kChunkTimeMilliseconds, FLAGS_sample_rate);
  while (true) {
    size_t samples_read = webrtc::PcmReadToFloat(read_file,
                                                 kInputSamplesPerChunk,
--- a/webrtc/modules/audio_processing/beamformer/mock_beamformer.cc
+++ b/webrtc/modules/audio_processing/beamformer/mock_beamformer.cc
@ -0,0 +1,22 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/beamformer/mock_beamformer.h"
+
+#include <vector>
+
+namespace webrtc {
+
+MockBeamformer::MockBeamformer(const std::vector<Point>& array_geometry)
+    : Beamformer(array_geometry) {}
+
+MockBeamformer::~MockBeamformer() {}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/beamformer/mock_beamformer.h
+++ b/webrtc/modules/audio_processing/beamformer/mock_beamformer.h
@ -0,0 +1,38 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MOCK_BEAMFORMER_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MOCK_BEAMFORMER_H_
+
+#include <vector>
+
+#include "testing/gmock/include/gmock/gmock.h"
+#include "webrtc/modules/audio_processing/beamformer/beamformer.h"
+
+namespace webrtc {
+
+class MockBeamformer : public Beamformer {
+ public:
+  explicit MockBeamformer(const std::vector<Point>& array_geometry);
+  ~MockBeamformer() override;
+
+  MOCK_METHOD2(Initialize, void(int chunk_size_ms, int sample_rate_hz));
+  MOCK_METHOD6(ProcessChunk, void(const float* const* input,
+                                  const float* const* high_pass_split_input,
+                                  int num_input_channels,
+                                  int num_frames_per_band,
+                                  float* const* output,
+                                  float* const* high_pass_split_output));
+  MOCK_METHOD0(is_target_present, bool());
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MOCK_BEAMFORMER_H_
--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h
@ -24,6 +24,7 @@ struct AecCore;
 namespace webrtc {

 class AudioFrame;
+class Beamformer;
 class EchoCancellation;
 class EchoControlMobile;
 class GainControl;
@ -199,6 +200,8 @@ class AudioProcessing {
  static AudioProcessing* Create();
  // Allows passing in an optional configuration at create-time.
  static AudioProcessing* Create(const Config& config);
+  // Only for testing.
+  static AudioProcessing* Create(const Config& config, Beamformer* beamformer);
  virtual ~AudioProcessing() {}

  // Initializes internal states, while retaining all user settings. This
--- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc
+++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc
@ -18,6 +18,7 @@
 #include "webrtc/common_audio/resampler/include/push_resampler.h"
 #include "webrtc/common_audio/resampler/push_sinc_resampler.h"
 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/beamformer/mock_beamformer.h"
 #include "webrtc/modules/audio_processing/common.h"
 #include "webrtc/modules/audio_processing/include/audio_processing.h"
 #include "webrtc/modules/audio_processing/test/test_utils.h"
@ -278,6 +279,35 @@ void OpenFileAndReadMessage(const std::string filename,
  fclose(file);
 }

+// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed
+// stereo) file, converts to deinterleaved float (optionally downmixing) and
+// returns the result in |cb|. Returns false if the file ended (or on error) and
+// true otherwise.
+//
+// |int_data| and |float_data| are just temporary space that must be
+// sufficiently large to hold the 10 ms chunk.
+bool ReadChunk(FILE* file, int16_t* int_data, float* float_data,
+               ChannelBuffer<float>* cb) {
+  // The files always contain stereo audio.
+  size_t frame_size = cb->samples_per_channel() * 2;
+  size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file);
+  if (read_count != frame_size) {
+    // Check that the file really ended.
+    assert(feof(file));
+    return false;  // This is expected.
+  }
+
+  S16ToFloat(int_data, frame_size, float_data);
+  if (cb->num_channels() == 1) {
+    MixStereoToMono(float_data, cb->data(), cb->samples_per_channel());
+  } else {
+    Deinterleave(float_data, cb->samples_per_channel(), 2,
+                 cb->channels());
+  }
+
+  return true;
+}
+
 class ApmTest : public ::testing::Test {
 protected:
  ApmTest();
@ -1164,6 +1194,87 @@ TEST_F(ApmTest, ManualVolumeChangeIsPossible) {
  }
 }

+#if !defined(WEBRTC_ANDROID) && !defined(WEBRTC_IOS)
+TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) {
+  const int kSampleRateHz = 16000;
+  const int kSamplesPerChannel =
+      AudioProcessing::kChunkSizeMs * kSampleRateHz / 1000;
+  const int kNumInputChannels = 2;
+  const int kNumOutputChannels = 1;
+  const int kNumChunks = 700;
+  const float kScaleFactor = 0.25f;
+  Config config;
+  std::vector<webrtc::Point> geometry;
+  geometry.push_back(webrtc::Point(0.f, 0.f, 0.f));
+  geometry.push_back(webrtc::Point(0.05f, 0.f, 0.f));
+  config.Set<Beamforming>(new Beamforming(true, geometry));
+  testing::NiceMock<MockBeamformer>* beamformer =
+      new testing::NiceMock<MockBeamformer>(geometry);
+  scoped_ptr<AudioProcessing> apm(AudioProcessing::Create(config, beamformer));
+  EXPECT_EQ(kNoErr, apm->gain_control()->Enable(true));
+  ChannelBuffer<float> src_buf(kSamplesPerChannel, kNumInputChannels);
+  ChannelBuffer<float> dest_buf(kSamplesPerChannel, kNumOutputChannels);
+  const int max_length = kSamplesPerChannel * std::max(kNumInputChannels,
+                                                       kNumOutputChannels);
+  scoped_ptr<int16_t[]> int_data(new int16_t[max_length]);
+  scoped_ptr<float[]> float_data(new float[max_length]);
+  std::string filename = ResourceFilePath("far", kSampleRateHz);
+  FILE* far_file = fopen(filename.c_str(), "rb");
+  ASSERT_TRUE(far_file != NULL) << "Could not open file " << filename << "\n";
+  const int kDefaultVolume = apm->gain_control()->stream_analog_level();
+  const int kDefaultCompressionGain =
+      apm->gain_control()->compression_gain_db();
+  bool is_target = false;
+  EXPECT_CALL(*beamformer, is_target_present())
+      .WillRepeatedly(testing::ReturnPointee(&is_target));
+  for (int i = 0; i < kNumChunks; ++i) {
+    ASSERT_TRUE(ReadChunk(far_file,
+                          int_data.get(),
+                          float_data.get(),
+                          &src_buf));
+    for (int j = 0; j < kNumInputChannels * kSamplesPerChannel; ++j) {
+      src_buf.data()[j] *= kScaleFactor;
+    }
+    EXPECT_EQ(kNoErr,
+              apm->ProcessStream(src_buf.channels(),
+                                 src_buf.samples_per_channel(),
+                                 kSampleRateHz,
+                                 LayoutFromChannels(src_buf.num_channels()),
+                                 kSampleRateHz,
+                                 LayoutFromChannels(dest_buf.num_channels()),
+                                 dest_buf.channels()));
+  }
+  EXPECT_EQ(kDefaultVolume,
+            apm->gain_control()->stream_analog_level());
+  EXPECT_EQ(kDefaultCompressionGain,
+            apm->gain_control()->compression_gain_db());
+  rewind(far_file);
+  is_target = true;
+  for (int i = 0; i < kNumChunks; ++i) {
+    ASSERT_TRUE(ReadChunk(far_file,
+                          int_data.get(),
+                          float_data.get(),
+                          &src_buf));
+    for (int j = 0; j < kNumInputChannels * kSamplesPerChannel; ++j) {
+      src_buf.data()[j] *= kScaleFactor;
+    }
+    EXPECT_EQ(kNoErr,
+              apm->ProcessStream(src_buf.channels(),
+                                 src_buf.samples_per_channel(),
+                                 kSampleRateHz,
+                                 LayoutFromChannels(src_buf.num_channels()),
+                                 kSampleRateHz,
+                                 LayoutFromChannels(dest_buf.num_channels()),
+                                 dest_buf.channels()));
+  }
+  EXPECT_LT(kDefaultVolume,
+            apm->gain_control()->stream_analog_level());
+  EXPECT_LT(kDefaultCompressionGain,
+            apm->gain_control()->compression_gain_db());
+  ASSERT_EQ(0, fclose(far_file));
+}
+#endif
+
 TEST_F(ApmTest, NoiseSuppression) {
  // Test valid suppression levels.
  NoiseSuppression::Level level[] = {
@ -2031,35 +2142,6 @@ TEST_F(ApmTest, NoErrorsWithKeyboardChannel) {
  }
 }

-// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed
-// stereo) file, converts to deinterleaved float (optionally downmixing) and
-// returns the result in |cb|. Returns false if the file ended (or on error) and
-// true otherwise.
-//
-// |int_data| and |float_data| are just temporary space that must be
-// sufficiently large to hold the 10 ms chunk.
-bool ReadChunk(FILE* file, int16_t* int_data, float* float_data,
-               ChannelBuffer<float>* cb) {
-  // The files always contain stereo audio.
-  size_t frame_size = cb->samples_per_channel() * 2;
-  size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file);
-  if (read_count != frame_size) {
-    // Check that the file really ended.
-    assert(feof(file));
-    return false;  // This is expected.
-  }
-
-  S16ToFloat(int_data, frame_size, float_data);
-  if (cb->num_channels() == 1) {
-    MixStereoToMono(float_data, cb->data(), cb->samples_per_channel());
-  } else {
-    Deinterleave(float_data, cb->samples_per_channel(), 2,
-                 cb->channels());
-  }
-
-  return true;
-}
-
 // Compares the reference and test arrays over a region around the expected
 // delay. Finds the highest SNR in that region and adds the variance and squared
 // error results to the supplied accumulators.
--- a/webrtc/modules/modules.gyp
+++ b/webrtc/modules/modules.gyp
@ -180,6 +180,8 @@
            'audio_processing/beamformer/complex_matrix_unittest.cc',
            'audio_processing/beamformer/covariance_matrix_generator_unittest.cc',
            'audio_processing/beamformer/matrix_unittest.cc',
+            'audio_processing/beamformer/mock_beamformer.cc',
+            'audio_processing/beamformer/mock_beamformer.h',
            'audio_processing/beamformer/pcm_utils.cc',
            'audio_processing/beamformer/pcm_utils.h',
            'audio_processing/echo_cancellation_impl_unittest.cc',