Add high frequency correction to Beamformer
R=andrew@webrtc.org Review URL: https://webrtc-codereview.appspot.com/35989004 Cr-Commit-Position: refs/heads/master@{#8278} git-svn-id: http://webrtc.googlecode.com/svn/trunk@8278 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
		| @@ -62,11 +62,11 @@ const float kCovUniformGapHalfWidth = 0.001f; | |||||||
| const float kHalfLifeSeconds = 0.05f; | const float kHalfLifeSeconds = 0.05f; | ||||||
|  |  | ||||||
| // The average mask is computed from masks in this mid-frequency range. | // The average mask is computed from masks in this mid-frequency range. | ||||||
| const int kMidFrequnecyLowerBoundHz = 250; | const int kLowAverageStartHz = 200; | ||||||
| const int kMidFrequencyUpperBoundHz = 400; | const int kLowAverageEndHz = 400; | ||||||
|  |  | ||||||
| const int kHighFrequencyLowerBoundHz = 4000; | const int kHighAverageStartHz = 6000; | ||||||
| const int kHighFrequencyUpperBoundHz = 7000; | const int kHighAverageEndHz = 6500; | ||||||
|  |  | ||||||
| // Mask threshold over which the data is considered signal and not interference. | // Mask threshold over which the data is considered signal and not interference. | ||||||
| const float kMaskTargetThreshold = 0.3f; | const float kMaskTargetThreshold = 0.3f; | ||||||
| @@ -141,24 +141,24 @@ void Beamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { | |||||||
|   sample_rate_hz_ = sample_rate_hz; |   sample_rate_hz_ = sample_rate_hz; | ||||||
|   decay_threshold_ = |   decay_threshold_ = | ||||||
|       pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds)); |       pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds)); | ||||||
|   mid_frequency_lower_bin_bound_ = |   low_average_start_bin_ = | ||||||
|       Round(kMidFrequnecyLowerBoundHz * kFftSize / sample_rate_hz_); |       Round(kLowAverageStartHz * kFftSize / sample_rate_hz_); | ||||||
|   mid_frequency_upper_bin_bound_ = |   low_average_end_bin_ = | ||||||
|       Round(kMidFrequencyUpperBoundHz * kFftSize / sample_rate_hz_); |       Round(kLowAverageEndHz * kFftSize / sample_rate_hz_); | ||||||
|   high_frequency_lower_bin_bound_ = |   high_average_start_bin_ = | ||||||
|       Round(kHighFrequencyLowerBoundHz * kFftSize / sample_rate_hz_); |       Round(kHighAverageStartHz * kFftSize / sample_rate_hz_); | ||||||
|   high_frequency_upper_bin_bound_ = |   high_average_end_bin_ = | ||||||
|       Round(kHighFrequencyUpperBoundHz * kFftSize / sample_rate_hz_); |       Round(kHighAverageEndHz * kFftSize / sample_rate_hz_); | ||||||
|   current_block_ix_ = 0; |   current_block_ix_ = 0; | ||||||
|   previous_block_ix_ = -1; |   previous_block_ix_ = -1; | ||||||
|   is_target_present_ = false; |   is_target_present_ = false; | ||||||
|   hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; |   hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; | ||||||
|   interference_blocks_count_ = hold_target_blocks_; |   interference_blocks_count_ = hold_target_blocks_; | ||||||
|  |  | ||||||
|   DCHECK_LE(mid_frequency_upper_bin_bound_, kNumFreqBins); |   DCHECK_LE(low_average_end_bin_, kNumFreqBins); | ||||||
|   DCHECK_LT(mid_frequency_lower_bin_bound_, mid_frequency_upper_bin_bound_); |   DCHECK_LT(low_average_start_bin_, low_average_end_bin_); | ||||||
|   DCHECK_LE(high_frequency_upper_bin_bound_, kNumFreqBins); |   DCHECK_LE(high_average_end_bin_, kNumFreqBins); | ||||||
|   DCHECK_LT(high_frequency_lower_bin_bound_, high_frequency_upper_bin_bound_); |   DCHECK_LT(high_average_start_bin_, high_average_end_bin_); | ||||||
|  |  | ||||||
|   lapped_transform_.reset(new LappedTransform(num_input_channels_, |   lapped_transform_.reset(new LappedTransform(num_input_channels_, | ||||||
|                                               1, |                                               1, | ||||||
| @@ -329,7 +329,7 @@ void Beamformer::ProcessAudioBlock(const complex_f* const* input, | |||||||
|   // Calculating the post-filter masks. Note that we need two for each |   // Calculating the post-filter masks. Note that we need two for each | ||||||
|   // frequency bin to account for the positive and negative interferer |   // frequency bin to account for the positive and negative interferer | ||||||
|   // angle. |   // angle. | ||||||
|   for (int i = 0; i < kNumFreqBins; ++i) { |   for (int i = low_average_start_bin_; i < high_average_end_bin_; ++i) { | ||||||
|     eig_m_.CopyFromColumn(input, i, num_input_channels_); |     eig_m_.CopyFromColumn(input, i, num_input_channels_); | ||||||
|     float eig_m_norm_factor = |     float eig_m_norm_factor = | ||||||
|         std::sqrt(ConjugateDotProduct(eig_m_, eig_m_)).real(); |         std::sqrt(ConjugateDotProduct(eig_m_, eig_m_)).real(); | ||||||
| @@ -368,10 +368,7 @@ void Beamformer::ProcessAudioBlock(const complex_f* const* input, | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   ApplyLowFrequencyCorrection(); |   ApplyLowFrequencyCorrection(); | ||||||
|  |   ApplyHighFrequencyCorrection(); | ||||||
|   if (high_pass_exists_) { |  | ||||||
|     CalculateHighFrequencyMask(); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   ApplyMasks(input, output); |   ApplyMasks(input, output); | ||||||
|  |  | ||||||
| @@ -430,31 +427,29 @@ void Beamformer::ApplyDecay() { | |||||||
| void Beamformer::ApplyLowFrequencyCorrection() { | void Beamformer::ApplyLowFrequencyCorrection() { | ||||||
|   float low_frequency_mask = 0.f; |   float low_frequency_mask = 0.f; | ||||||
|   float* mask_els = postfilter_masks_[current_block_ix_].elements()[0]; |   float* mask_els = postfilter_masks_[current_block_ix_].elements()[0]; | ||||||
|   for (int i = mid_frequency_lower_bin_bound_; |   for (int i = low_average_start_bin_; i < low_average_end_bin_; ++i) { | ||||||
|        i <= mid_frequency_upper_bin_bound_; |  | ||||||
|        ++i) { |  | ||||||
|     low_frequency_mask += mask_els[i]; |     low_frequency_mask += mask_els[i]; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   low_frequency_mask /= |   low_frequency_mask /= low_average_end_bin_ - low_average_start_bin_; | ||||||
|       mid_frequency_upper_bin_bound_ - mid_frequency_lower_bin_bound_ + 1; |  | ||||||
|  |  | ||||||
|   for (int i = 0; i < mid_frequency_lower_bin_bound_; ++i) { |   for (int i = 0; i < low_average_start_bin_; ++i) { | ||||||
|     mask_els[i] = low_frequency_mask; |     mask_els[i] = low_frequency_mask; | ||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| void Beamformer::CalculateHighFrequencyMask() { | void Beamformer::ApplyHighFrequencyCorrection() { | ||||||
|   float high_pass_mask = 0.f; |   float high_pass_mask = 0.f; | ||||||
|   float* mask_els = postfilter_masks_[current_block_ix_].elements()[0]; |   float* mask_els = postfilter_masks_[current_block_ix_].elements()[0]; | ||||||
|   for (int i = high_frequency_lower_bin_bound_; |   for (int i = high_average_start_bin_; i < high_average_end_bin_; ++i) { | ||||||
|        i <= high_frequency_upper_bin_bound_; |  | ||||||
|        ++i) { |  | ||||||
|     high_pass_mask += mask_els[i]; |     high_pass_mask += mask_els[i]; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   high_pass_mask /= |   high_pass_mask /= high_average_end_bin_ - high_average_start_bin_; | ||||||
|       high_frequency_upper_bin_bound_ - high_frequency_lower_bin_bound_ + 1; |  | ||||||
|  |   for (int i = high_average_end_bin_; i < kNumFreqBins; ++i) { | ||||||
|  |     mask_els[i] = high_pass_mask; | ||||||
|  |   } | ||||||
|  |  | ||||||
|   high_pass_postfilter_mask_ += high_pass_mask; |   high_pass_postfilter_mask_ += high_pass_mask; | ||||||
| } | } | ||||||
|   | |||||||
| @@ -94,7 +94,7 @@ class Beamformer : public LappedTransform::Callback { | |||||||
|   // in the time-domain. Further, we average these block-masks over a chunk, |   // in the time-domain. Further, we average these block-masks over a chunk, | ||||||
|   // resulting in one postfilter mask per audio chunk. This allows us to skip |   // resulting in one postfilter mask per audio chunk. This allows us to skip | ||||||
|   // both transforming and blocking the high-frequency signal. |   // both transforming and blocking the high-frequency signal. | ||||||
|   void CalculateHighFrequencyMask(); |   void ApplyHighFrequencyCorrection(); | ||||||
|  |  | ||||||
|   // Applies both sets of masks to |input| and store in |output|. |   // Applies both sets of masks to |input| and store in |output|. | ||||||
|   void ApplyMasks(const complex_f* const* input, complex_f* const* output); |   void ApplyMasks(const complex_f* const* input, complex_f* const* output); | ||||||
| @@ -120,10 +120,10 @@ class Beamformer : public LappedTransform::Callback { | |||||||
|  |  | ||||||
|   // Calculated based on user-input and constants in the .cc file. |   // Calculated based on user-input and constants in the .cc file. | ||||||
|   float decay_threshold_; |   float decay_threshold_; | ||||||
|   int mid_frequency_lower_bin_bound_; |   int low_average_start_bin_; | ||||||
|   int mid_frequency_upper_bin_bound_; |   int low_average_end_bin_; | ||||||
|   int high_frequency_lower_bin_bound_; |   int high_average_start_bin_; | ||||||
|   int high_frequency_upper_bin_bound_; |   int high_average_end_bin_; | ||||||
|  |  | ||||||
|   // Indices into |postfilter_masks_|. |   // Indices into |postfilter_masks_|. | ||||||
|   int current_block_ix_; |   int current_block_ix_; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 aluebs@webrtc.org
					aluebs@webrtc.org