From fb7a039e9d035cc2d0a9591022baeab9475074c2 Mon Sep 17 00:00:00 2001
From: "aluebs@webrtc.org" <aluebs@webrtc.org>
Date: Mon, 5 Jan 2015 21:58:58 +0000
Subject: [PATCH] Use array geometry in Beamformer

R=andrew@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/35559004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@8000 4adac7df-926f-26a2-2b94-8c16560cd09d
---
 .../audio_processing/audio_processing_impl.cc | 10 ++++-----
 .../audio_processing/audio_processing_impl.h  |  1 +
 .../audio_processing/beamformer/beamformer.cc | 21 +++++++++++++++----
 .../audio_processing/beamformer/beamformer.h  |  9 +++++---
 .../beamformer/beamformer_test.cc             |  8 +++++--
 .../include/audio_processing.h                | 18 ++++++++++++++--
 6 files changed, 51 insertions(+), 16 deletions(-)
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc
index 086380e40..73b3f2709 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
@@ -185,7 +185,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config)
       use_new_agc_(config.Get<ExperimentalAgc>().enabled),
 #endif
       transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled),
-      beamformer_enabled_(config.Get<Beamforming>().enabled) {
+      beamformer_enabled_(config.Get<Beamforming>().enabled),
+      array_geometry_(config.Get<Beamforming>().array_geometry) {
   echo_cancellation_ = new EchoCancellationImpl(this, crit_);
   component_list_.push_back(echo_cancellation_);
 
@@ -400,7 +401,8 @@ int AudioProcessingImpl::MaybeInitializeLocked(int input_sample_rate_hz,
     return kNoError;
   }
   if (beamformer_enabled_ &&
-      (num_input_channels < 2 || num_output_channels > 1)) {
+      (static_cast<size_t>(num_input_channels) != array_geometry_.size() ||
+       num_output_channels > 1)) {
     return kBadNumberChannelsError;
   }
   return InitializeLocked(input_sample_rate_hz,
@@ -995,11 +997,9 @@ int AudioProcessingImpl::InitializeTransient() {
 void AudioProcessingImpl::InitializeBeamformer() {
   if (beamformer_enabled_) {
 #ifdef WEBRTC_BEAMFORMER
-    // TODO(aluebs): Don't use a hard-coded microphone spacing.
     beamformer_.reset(new Beamformer(kChunkSizeMs,
                                      split_rate_,
-                                     fwd_in_format_.num_channels(),
-                                     0.05f));
+                                     array_geometry_));
 #else
     assert(false);
 #endif
diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h
index 152d64cd9..08de122b9 100644
--- a/webrtc/modules/audio_processing/audio_processing_impl.h
+++ b/webrtc/modules/audio_processing/audio_processing_impl.h
@@ -219,6 +219,7 @@ class AudioProcessingImpl : public AudioProcessing {
   scoped_ptr<TransientSuppressor> transient_suppressor_;
   const bool beamformer_enabled_;
   scoped_ptr<Beamformer> beamformer_;
+  const std::vector<Point> array_geometry_;
 };
 
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_processing/beamformer/beamformer.cc b/webrtc/modules/audio_processing/beamformer/beamformer.cc
index 4dfd3c2ec..f41462eef 100644
--- a/webrtc/modules/audio_processing/beamformer/beamformer.cc
+++ b/webrtc/modules/audio_processing/beamformer/beamformer.cc
@@ -128,13 +128,12 @@ int Round(float x) {
 
 Beamformer::Beamformer(int chunk_size_ms,
                        int sample_rate_hz,
-                       int num_input_channels,
-                       float mic_spacing)
+                       const std::vector<Point>& array_geometry)
     : chunk_length_(sample_rate_hz / (1000.f / chunk_size_ms)),
       window_(new float[kFftSize]),
-      num_input_channels_(num_input_channels),
+      num_input_channels_(array_geometry.size()),
       sample_rate_hz_(sample_rate_hz),
-      mic_spacing_(mic_spacing),
+      mic_spacing_(MicSpacingFromGeometry(array_geometry)),
       decay_threshold_(
           pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds))),
       mid_frequency_lower_bin_bound_(
@@ -477,4 +476,18 @@ void Beamformer::CalculateHighFrequencyMask() {
   high_pass_postfilter_mask_ += high_pass_mask;
 }
 
+// This method CHECKs for a uniform linear array.
+float Beamformer::MicSpacingFromGeometry(const std::vector<Point>& geometry) {
+  CHECK_GE(geometry.size(), 2u);
+  float mic_spacing = 0.f;
+  for (size_t i = 0u; i < 3u; ++i) {
+    float difference = geometry[1].c[i] - geometry[0].c[i];
+    for (size_t j = 2u; j < geometry.size(); ++j) {
+      CHECK_LT(geometry[j].c[i] - geometry[j - 1].c[i] - difference, 1e-6);
+    }
+    mic_spacing += difference * difference;
+  }
+  return sqrt(mic_spacing);
+}
+
 }  // namespace webrtc
diff --git a/webrtc/modules/audio_processing/beamformer/beamformer.h b/webrtc/modules/audio_processing/beamformer/beamformer.h
index 31b372198..d50b684a3 100644
--- a/webrtc/modules/audio_processing/beamformer/beamformer.h
+++ b/webrtc/modules/audio_processing/beamformer/beamformer.h
@@ -13,6 +13,7 @@
 
 #include "webrtc/common_audio/lapped_transform.h"
 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
+#include "webrtc/modules/audio_processing/include/audio_processing.h"
 
 namespace webrtc {
 
@@ -26,12 +27,12 @@ namespace webrtc {
 // TODO: Target angle assumed to be 0. Parameterize target angle.
 class Beamformer : public LappedTransform::Callback {
  public:
+  // At the moment it only accepts uniform linear microphone arrays. Using the
+  // first microphone as a reference position [0, 0, 0] is a natural choice.
   Beamformer(int chunk_size_ms,
              // Sample rate corresponds to the lower band.
              int sample_rate_hz,
-             int num_input_channels,
-             // Microphone spacing in meters.
-             float mic_spacing);
+             const std::vector<Point>& array_geometry);
 
   // Process one time-domain chunk of audio. The audio can be separated into
   // two signals by frequency, with the higher half passed in as the second
@@ -91,6 +92,8 @@ class Beamformer : public LappedTransform::Callback {
   // Applies both sets of masks to |input| and store in |output|.
   void ApplyMasks(const complex_f* const* input, complex_f* const* output);
 
+  float MicSpacingFromGeometry(const std::vector<Point>& array_geometry);
+
   // Deals with the fft transform and blocking.
   const int chunk_length_;
   scoped_ptr<LappedTransform> lapped_transform_;
diff --git a/webrtc/modules/audio_processing/beamformer/beamformer_test.cc b/webrtc/modules/audio_processing/beamformer/beamformer_test.cc
index e202506e6..e20c3a9a2 100644
--- a/webrtc/modules/audio_processing/beamformer/beamformer_test.cc
+++ b/webrtc/modules/audio_processing/beamformer/beamformer_test.cc
@@ -54,10 +54,14 @@ int main(int argc, char* argv[]) {
   fseek(read_file, 44, SEEK_SET);
 
   FILE* write_file = fopen(FLAGS_output_file_path.c_str(), "wb");
+
+  std::vector<webrtc::Point> array_geometry;
+  for (int i = 0; i < FLAGS_num_input_channels; ++i) {
+    array_geometry.push_back(webrtc::Point(i * FLAGS_mic_spacing, 0.f, 0.f));
+  }
   webrtc::Beamformer bf(kChunkTimeMilliseconds,
                         FLAGS_sample_rate,
-                        FLAGS_num_input_channels,
-                        FLAGS_mic_spacing);
+                        array_geometry);
   while (true) {
     size_t samples_read = webrtc::PcmReadToFloat(read_file,
                                                  kInputSamplesPerChunk,
diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h
index 79340aeee..975445052 100644
--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h
@@ -13,6 +13,7 @@
 
 #include <stddef.h>  // size_t
 #include <stdio.h>  // FILE
+#include <vector>
 
 #include "webrtc/base/platform_file.h"
 #include "webrtc/common.h"
@@ -82,12 +83,25 @@ struct ExperimentalNs {
   bool enabled;
 };
 
+// Coordinates in meters.
+struct Point {
+  Point(float x, float y, float z) {
+    c[0] = x;
+    c[1] = y;
+    c[2] = z;
+  }
+  float c[3];
+};
+
 // Use to enable beamforming. Must be provided through the constructor. It will
 // have no impact if used with AudioProcessing::SetExtraOptions().
 struct Beamforming {
   Beamforming() : enabled(false) {}
-  explicit Beamforming(bool enabled) : enabled(enabled) {}
-  bool enabled;
+  Beamforming(bool enabled, const std::vector<Point>& array_geometry)
+      : enabled(enabled),
+        array_geometry(array_geometry) {}
+  const bool enabled;
+  const std::vector<Point> array_geometry;
 };
 
 static const int kAudioProcMaxNativeSampleRateHz = 32000;