Add high frequency correction to Beamformer

R=andrew@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/35989004

Cr-Commit-Position: refs/heads/master@{#8278}
git-svn-id: http://webrtc.googlecode.com/svn/trunk@8278 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
aluebs@webrtc.org 2015-02-07 01:07:09 +00:00
parent 0c7ec770ff
commit 799e667e9f
2 changed files with 33 additions and 38 deletions

View File

@ -62,11 +62,11 @@ const float kCovUniformGapHalfWidth = 0.001f;
const float kHalfLifeSeconds = 0.05f;
// The average mask is computed from masks in this mid-frequency range.
const int kMidFrequnecyLowerBoundHz = 250;
const int kMidFrequencyUpperBoundHz = 400;
const int kLowAverageStartHz = 200;
const int kLowAverageEndHz = 400;
const int kHighFrequencyLowerBoundHz = 4000;
const int kHighFrequencyUpperBoundHz = 7000;
const int kHighAverageStartHz = 6000;
const int kHighAverageEndHz = 6500;
// Mask threshold over which the data is considered signal and not interference.
const float kMaskTargetThreshold = 0.3f;
@ -141,24 +141,24 @@ void Beamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
sample_rate_hz_ = sample_rate_hz;
decay_threshold_ =
pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds));
mid_frequency_lower_bin_bound_ =
Round(kMidFrequnecyLowerBoundHz * kFftSize / sample_rate_hz_);
mid_frequency_upper_bin_bound_ =
Round(kMidFrequencyUpperBoundHz * kFftSize / sample_rate_hz_);
high_frequency_lower_bin_bound_ =
Round(kHighFrequencyLowerBoundHz * kFftSize / sample_rate_hz_);
high_frequency_upper_bin_bound_ =
Round(kHighFrequencyUpperBoundHz * kFftSize / sample_rate_hz_);
low_average_start_bin_ =
Round(kLowAverageStartHz * kFftSize / sample_rate_hz_);
low_average_end_bin_ =
Round(kLowAverageEndHz * kFftSize / sample_rate_hz_);
high_average_start_bin_ =
Round(kHighAverageStartHz * kFftSize / sample_rate_hz_);
high_average_end_bin_ =
Round(kHighAverageEndHz * kFftSize / sample_rate_hz_);
current_block_ix_ = 0;
previous_block_ix_ = -1;
is_target_present_ = false;
hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
interference_blocks_count_ = hold_target_blocks_;
DCHECK_LE(mid_frequency_upper_bin_bound_, kNumFreqBins);
DCHECK_LT(mid_frequency_lower_bin_bound_, mid_frequency_upper_bin_bound_);
DCHECK_LE(high_frequency_upper_bin_bound_, kNumFreqBins);
DCHECK_LT(high_frequency_lower_bin_bound_, high_frequency_upper_bin_bound_);
DCHECK_LE(low_average_end_bin_, kNumFreqBins);
DCHECK_LT(low_average_start_bin_, low_average_end_bin_);
DCHECK_LE(high_average_end_bin_, kNumFreqBins);
DCHECK_LT(high_average_start_bin_, high_average_end_bin_);
lapped_transform_.reset(new LappedTransform(num_input_channels_,
1,
@ -329,7 +329,7 @@ void Beamformer::ProcessAudioBlock(const complex_f* const* input,
// Calculating the post-filter masks. Note that we need two for each
// frequency bin to account for the positive and negative interferer
// angle.
for (int i = 0; i < kNumFreqBins; ++i) {
for (int i = low_average_start_bin_; i < high_average_end_bin_; ++i) {
eig_m_.CopyFromColumn(input, i, num_input_channels_);
float eig_m_norm_factor =
std::sqrt(ConjugateDotProduct(eig_m_, eig_m_)).real();
@ -368,10 +368,7 @@ void Beamformer::ProcessAudioBlock(const complex_f* const* input,
}
ApplyLowFrequencyCorrection();
if (high_pass_exists_) {
CalculateHighFrequencyMask();
}
ApplyHighFrequencyCorrection();
ApplyMasks(input, output);
@ -430,31 +427,29 @@ void Beamformer::ApplyDecay() {
void Beamformer::ApplyLowFrequencyCorrection() {
float low_frequency_mask = 0.f;
float* mask_els = postfilter_masks_[current_block_ix_].elements()[0];
for (int i = mid_frequency_lower_bin_bound_;
i <= mid_frequency_upper_bin_bound_;
++i) {
for (int i = low_average_start_bin_; i < low_average_end_bin_; ++i) {
low_frequency_mask += mask_els[i];
}
low_frequency_mask /=
mid_frequency_upper_bin_bound_ - mid_frequency_lower_bin_bound_ + 1;
low_frequency_mask /= low_average_end_bin_ - low_average_start_bin_;
for (int i = 0; i < mid_frequency_lower_bin_bound_; ++i) {
for (int i = 0; i < low_average_start_bin_; ++i) {
mask_els[i] = low_frequency_mask;
}
}
void Beamformer::CalculateHighFrequencyMask() {
void Beamformer::ApplyHighFrequencyCorrection() {
float high_pass_mask = 0.f;
float* mask_els = postfilter_masks_[current_block_ix_].elements()[0];
for (int i = high_frequency_lower_bin_bound_;
i <= high_frequency_upper_bin_bound_;
++i) {
for (int i = high_average_start_bin_; i < high_average_end_bin_; ++i) {
high_pass_mask += mask_els[i];
}
high_pass_mask /=
high_frequency_upper_bin_bound_ - high_frequency_lower_bin_bound_ + 1;
high_pass_mask /= high_average_end_bin_ - high_average_start_bin_;
for (int i = high_average_end_bin_; i < kNumFreqBins; ++i) {
mask_els[i] = high_pass_mask;
}
high_pass_postfilter_mask_ += high_pass_mask;
}

View File

@ -94,7 +94,7 @@ class Beamformer : public LappedTransform::Callback {
// in the time-domain. Further, we average these block-masks over a chunk,
// resulting in one postfilter mask per audio chunk. This allows us to skip
// both transforming and blocking the high-frequency signal.
void CalculateHighFrequencyMask();
void ApplyHighFrequencyCorrection();
// Applies both sets of masks to |input| and store in |output|.
void ApplyMasks(const complex_f* const* input, complex_f* const* output);
@ -120,10 +120,10 @@ class Beamformer : public LappedTransform::Callback {
// Calculated based on user-input and constants in the .cc file.
float decay_threshold_;
int mid_frequency_lower_bin_bound_;
int mid_frequency_upper_bin_bound_;
int high_frequency_lower_bin_bound_;
int high_frequency_upper_bin_bound_;
int low_average_start_bin_;
int low_average_end_bin_;
int high_average_start_bin_;
int high_average_end_bin_;
// Indices into |postfilter_masks_|.
int current_block_ix_;