From fb7a039e9d035cc2d0a9591022baeab9475074c2 Mon Sep 17 00:00:00 2001 From: "aluebs@webrtc.org" Date: Mon, 5 Jan 2015 21:58:58 +0000 Subject: [PATCH] Use array geometry in Beamformer R=andrew@webrtc.org Review URL: https://webrtc-codereview.appspot.com/35559004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@8000 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../audio_processing/audio_processing_impl.cc | 10 ++++----- .../audio_processing/audio_processing_impl.h | 1 + .../audio_processing/beamformer/beamformer.cc | 21 +++++++++++++++---- .../audio_processing/beamformer/beamformer.h | 9 +++++--- .../beamformer/beamformer_test.cc | 8 +++++-- .../include/audio_processing.h | 18 ++++++++++++++-- 6 files changed, 51 insertions(+), 16 deletions(-) diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index 086380e40..73b3f2709 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -185,7 +185,8 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config) use_new_agc_(config.Get().enabled), #endif transient_suppressor_enabled_(config.Get().enabled), - beamformer_enabled_(config.Get().enabled) { + beamformer_enabled_(config.Get().enabled), + array_geometry_(config.Get().array_geometry) { echo_cancellation_ = new EchoCancellationImpl(this, crit_); component_list_.push_back(echo_cancellation_); @@ -400,7 +401,8 @@ int AudioProcessingImpl::MaybeInitializeLocked(int input_sample_rate_hz, return kNoError; } if (beamformer_enabled_ && - (num_input_channels < 2 || num_output_channels > 1)) { + (static_cast(num_input_channels) != array_geometry_.size() || + num_output_channels > 1)) { return kBadNumberChannelsError; } return InitializeLocked(input_sample_rate_hz, @@ -995,11 +997,9 @@ int AudioProcessingImpl::InitializeTransient() { void AudioProcessingImpl::InitializeBeamformer() { if (beamformer_enabled_) { #ifdef WEBRTC_BEAMFORMER - // TODO(aluebs): Don't use a hard-coded microphone spacing. beamformer_.reset(new Beamformer(kChunkSizeMs, split_rate_, - fwd_in_format_.num_channels(), - 0.05f)); + array_geometry_)); #else assert(false); #endif diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h index 152d64cd9..08de122b9 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.h +++ b/webrtc/modules/audio_processing/audio_processing_impl.h @@ -219,6 +219,7 @@ class AudioProcessingImpl : public AudioProcessing { scoped_ptr transient_suppressor_; const bool beamformer_enabled_; scoped_ptr beamformer_; + const std::vector array_geometry_; }; } // namespace webrtc diff --git a/webrtc/modules/audio_processing/beamformer/beamformer.cc b/webrtc/modules/audio_processing/beamformer/beamformer.cc index 4dfd3c2ec..f41462eef 100644 --- a/webrtc/modules/audio_processing/beamformer/beamformer.cc +++ b/webrtc/modules/audio_processing/beamformer/beamformer.cc @@ -128,13 +128,12 @@ int Round(float x) { Beamformer::Beamformer(int chunk_size_ms, int sample_rate_hz, - int num_input_channels, - float mic_spacing) + const std::vector& array_geometry) : chunk_length_(sample_rate_hz / (1000.f / chunk_size_ms)), window_(new float[kFftSize]), - num_input_channels_(num_input_channels), + num_input_channels_(array_geometry.size()), sample_rate_hz_(sample_rate_hz), - mic_spacing_(mic_spacing), + mic_spacing_(MicSpacingFromGeometry(array_geometry)), decay_threshold_( pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds))), mid_frequency_lower_bin_bound_( @@ -477,4 +476,18 @@ void Beamformer::CalculateHighFrequencyMask() { high_pass_postfilter_mask_ += high_pass_mask; } +// This method CHECKs for a uniform linear array. +float Beamformer::MicSpacingFromGeometry(const std::vector& geometry) { + CHECK_GE(geometry.size(), 2u); + float mic_spacing = 0.f; + for (size_t i = 0u; i < 3u; ++i) { + float difference = geometry[1].c[i] - geometry[0].c[i]; + for (size_t j = 2u; j < geometry.size(); ++j) { + CHECK_LT(geometry[j].c[i] - geometry[j - 1].c[i] - difference, 1e-6); + } + mic_spacing += difference * difference; + } + return sqrt(mic_spacing); +} + } // namespace webrtc diff --git a/webrtc/modules/audio_processing/beamformer/beamformer.h b/webrtc/modules/audio_processing/beamformer/beamformer.h index 31b372198..d50b684a3 100644 --- a/webrtc/modules/audio_processing/beamformer/beamformer.h +++ b/webrtc/modules/audio_processing/beamformer/beamformer.h @@ -13,6 +13,7 @@ #include "webrtc/common_audio/lapped_transform.h" #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" namespace webrtc { @@ -26,12 +27,12 @@ namespace webrtc { // TODO: Target angle assumed to be 0. Parameterize target angle. class Beamformer : public LappedTransform::Callback { public: + // At the moment it only accepts uniform linear microphone arrays. Using the + // first microphone as a reference position [0, 0, 0] is a natural choice. Beamformer(int chunk_size_ms, // Sample rate corresponds to the lower band. int sample_rate_hz, - int num_input_channels, - // Microphone spacing in meters. - float mic_spacing); + const std::vector& array_geometry); // Process one time-domain chunk of audio. The audio can be separated into // two signals by frequency, with the higher half passed in as the second @@ -91,6 +92,8 @@ class Beamformer : public LappedTransform::Callback { // Applies both sets of masks to |input| and store in |output|. void ApplyMasks(const complex_f* const* input, complex_f* const* output); + float MicSpacingFromGeometry(const std::vector& array_geometry); + // Deals with the fft transform and blocking. const int chunk_length_; scoped_ptr lapped_transform_; diff --git a/webrtc/modules/audio_processing/beamformer/beamformer_test.cc b/webrtc/modules/audio_processing/beamformer/beamformer_test.cc index e202506e6..e20c3a9a2 100644 --- a/webrtc/modules/audio_processing/beamformer/beamformer_test.cc +++ b/webrtc/modules/audio_processing/beamformer/beamformer_test.cc @@ -54,10 +54,14 @@ int main(int argc, char* argv[]) { fseek(read_file, 44, SEEK_SET); FILE* write_file = fopen(FLAGS_output_file_path.c_str(), "wb"); + + std::vector array_geometry; + for (int i = 0; i < FLAGS_num_input_channels; ++i) { + array_geometry.push_back(webrtc::Point(i * FLAGS_mic_spacing, 0.f, 0.f)); + } webrtc::Beamformer bf(kChunkTimeMilliseconds, FLAGS_sample_rate, - FLAGS_num_input_channels, - FLAGS_mic_spacing); + array_geometry); while (true) { size_t samples_read = webrtc::PcmReadToFloat(read_file, kInputSamplesPerChunk, diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h index 79340aeee..975445052 100644 --- a/webrtc/modules/audio_processing/include/audio_processing.h +++ b/webrtc/modules/audio_processing/include/audio_processing.h @@ -13,6 +13,7 @@ #include // size_t #include // FILE +#include #include "webrtc/base/platform_file.h" #include "webrtc/common.h" @@ -82,12 +83,25 @@ struct ExperimentalNs { bool enabled; }; +// Coordinates in meters. +struct Point { + Point(float x, float y, float z) { + c[0] = x; + c[1] = y; + c[2] = z; + } + float c[3]; +}; + // Use to enable beamforming. Must be provided through the constructor. It will // have no impact if used with AudioProcessing::SetExtraOptions(). struct Beamforming { Beamforming() : enabled(false) {} - explicit Beamforming(bool enabled) : enabled(enabled) {} - bool enabled; + Beamforming(bool enabled, const std::vector& array_geometry) + : enabled(enabled), + array_geometry(array_geometry) {} + const bool enabled; + const std::vector array_geometry; }; static const int kAudioProcMaxNativeSampleRateHz = 32000;