Only adapt AGC when the desired signal is present
Take the 50% quantile of the mask and compare it to certain threshold to determine if the desired signal is present. A hold is applied to avoid fast switching between states. is_signal_present_ has been plotted and looks as expected. The AGC adaptation sounds promising, specially for the cases when the speaker fades in and out from the beam direction. R=andrew@webrtc.org Review URL: https://webrtc-codereview.appspot.com/28329005 git-svn-id: http://webrtc.googlecode.com/svn/trunk@8078 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
3e42a8a56a
commit
d82f55d2a7
@ -137,11 +137,16 @@ class GainControlForNewAgc : public GainControl, public VolumeCallbacks {
|
|||||||
|
|
||||||
AudioProcessing* AudioProcessing::Create() {
|
AudioProcessing* AudioProcessing::Create() {
|
||||||
Config config;
|
Config config;
|
||||||
return Create(config);
|
return Create(config, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
AudioProcessing* AudioProcessing::Create(const Config& config) {
|
AudioProcessing* AudioProcessing::Create(const Config& config) {
|
||||||
AudioProcessingImpl* apm = new AudioProcessingImpl(config);
|
return Create(config, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
AudioProcessing* AudioProcessing::Create(const Config& config,
|
||||||
|
Beamformer* beamformer) {
|
||||||
|
AudioProcessingImpl* apm = new AudioProcessingImpl(config, beamformer);
|
||||||
if (apm->Initialize() != kNoError) {
|
if (apm->Initialize() != kNoError) {
|
||||||
delete apm;
|
delete apm;
|
||||||
apm = NULL;
|
apm = NULL;
|
||||||
@ -151,6 +156,10 @@ AudioProcessing* AudioProcessing::Create(const Config& config) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
AudioProcessingImpl::AudioProcessingImpl(const Config& config)
|
AudioProcessingImpl::AudioProcessingImpl(const Config& config)
|
||||||
|
: AudioProcessingImpl(config, nullptr) {}
|
||||||
|
|
||||||
|
AudioProcessingImpl::AudioProcessingImpl(const Config& config,
|
||||||
|
Beamformer* beamformer)
|
||||||
: echo_cancellation_(NULL),
|
: echo_cancellation_(NULL),
|
||||||
echo_control_mobile_(NULL),
|
echo_control_mobile_(NULL),
|
||||||
gain_control_(NULL),
|
gain_control_(NULL),
|
||||||
@ -181,6 +190,7 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config)
|
|||||||
#endif
|
#endif
|
||||||
transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled),
|
transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled),
|
||||||
beamformer_enabled_(config.Get<Beamforming>().enabled),
|
beamformer_enabled_(config.Get<Beamforming>().enabled),
|
||||||
|
beamformer_(beamformer),
|
||||||
array_geometry_(config.Get<Beamforming>().array_geometry) {
|
array_geometry_(config.Get<Beamforming>().array_geometry) {
|
||||||
echo_cancellation_ = new EchoCancellationImpl(this, crit_);
|
echo_cancellation_ = new EchoCancellationImpl(this, crit_);
|
||||||
component_list_.push_back(echo_cancellation_);
|
component_list_.push_back(echo_cancellation_);
|
||||||
@ -330,6 +340,11 @@ int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz,
|
|||||||
num_reverse_channels > 2 || num_reverse_channels < 1) {
|
num_reverse_channels > 2 || num_reverse_channels < 1) {
|
||||||
return kBadNumberChannelsError;
|
return kBadNumberChannelsError;
|
||||||
}
|
}
|
||||||
|
if (beamformer_enabled_ &&
|
||||||
|
(static_cast<size_t>(num_input_channels) != array_geometry_.size() ||
|
||||||
|
num_output_channels > 1)) {
|
||||||
|
return kBadNumberChannelsError;
|
||||||
|
}
|
||||||
|
|
||||||
fwd_in_format_.set(input_sample_rate_hz, num_input_channels);
|
fwd_in_format_.set(input_sample_rate_hz, num_input_channels);
|
||||||
fwd_out_format_.set(output_sample_rate_hz, num_output_channels);
|
fwd_out_format_.set(output_sample_rate_hz, num_output_channels);
|
||||||
@ -395,11 +410,6 @@ int AudioProcessingImpl::MaybeInitializeLocked(int input_sample_rate_hz,
|
|||||||
num_reverse_channels == rev_in_format_.num_channels()) {
|
num_reverse_channels == rev_in_format_.num_channels()) {
|
||||||
return kNoError;
|
return kNoError;
|
||||||
}
|
}
|
||||||
if (beamformer_enabled_ &&
|
|
||||||
(static_cast<size_t>(num_input_channels) != array_geometry_.size() ||
|
|
||||||
num_output_channels > 1)) {
|
|
||||||
return kBadNumberChannelsError;
|
|
||||||
}
|
|
||||||
return InitializeLocked(input_sample_rate_hz,
|
return InitializeLocked(input_sample_rate_hz,
|
||||||
output_sample_rate_hz,
|
output_sample_rate_hz,
|
||||||
reverse_sample_rate_hz,
|
reverse_sample_rate_hz,
|
||||||
@ -622,7 +632,9 @@ int AudioProcessingImpl::ProcessStreamLocked() {
|
|||||||
RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca));
|
RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca));
|
||||||
RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca));
|
RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca));
|
||||||
|
|
||||||
if (use_new_agc_ && gain_control_->is_enabled()) {
|
if (use_new_agc_ &&
|
||||||
|
gain_control_->is_enabled() &&
|
||||||
|
(!beamformer_enabled_ || beamformer_->is_target_present())) {
|
||||||
agc_manager_->Process(ca->split_bands_const(0)[kBand0To8kHz],
|
agc_manager_->Process(ca->split_bands_const(0)[kBand0To8kHz],
|
||||||
ca->samples_per_split_channel(),
|
ca->samples_per_split_channel(),
|
||||||
split_rate_);
|
split_rate_);
|
||||||
@ -990,9 +1002,10 @@ int AudioProcessingImpl::InitializeTransient() {
|
|||||||
void AudioProcessingImpl::InitializeBeamformer() {
|
void AudioProcessingImpl::InitializeBeamformer() {
|
||||||
if (beamformer_enabled_) {
|
if (beamformer_enabled_) {
|
||||||
#ifdef WEBRTC_BEAMFORMER
|
#ifdef WEBRTC_BEAMFORMER
|
||||||
beamformer_.reset(new Beamformer(kChunkSizeMs,
|
if (!beamformer_) {
|
||||||
split_rate_,
|
beamformer_.reset(new Beamformer(array_geometry_));
|
||||||
array_geometry_));
|
}
|
||||||
|
beamformer_->Initialize(kChunkSizeMs, split_rate_);
|
||||||
#else
|
#else
|
||||||
assert(false);
|
assert(false);
|
||||||
#endif
|
#endif
|
||||||
|
@ -86,6 +86,8 @@ class AudioFormat : public AudioRate {
|
|||||||
class AudioProcessingImpl : public AudioProcessing {
|
class AudioProcessingImpl : public AudioProcessing {
|
||||||
public:
|
public:
|
||||||
explicit AudioProcessingImpl(const Config& config);
|
explicit AudioProcessingImpl(const Config& config);
|
||||||
|
// Only for testing.
|
||||||
|
AudioProcessingImpl(const Config& config, Beamformer* beamformer);
|
||||||
virtual ~AudioProcessingImpl();
|
virtual ~AudioProcessingImpl();
|
||||||
|
|
||||||
// AudioProcessing methods.
|
// AudioProcessing methods.
|
||||||
|
@ -27,7 +27,6 @@ const float kAlpha = 1.5f;
|
|||||||
// The minimum value a postprocessing mask can take.
|
// The minimum value a postprocessing mask can take.
|
||||||
const float kMaskMinimum = 0.01f;
|
const float kMaskMinimum = 0.01f;
|
||||||
|
|
||||||
const int kFftSize = 256;
|
|
||||||
const float kSpeedOfSoundMeterSeconds = 340;
|
const float kSpeedOfSoundMeterSeconds = 340;
|
||||||
|
|
||||||
// For both target and interf angles, 0 is perpendicular to the microphone
|
// For both target and interf angles, 0 is perpendicular to the microphone
|
||||||
@ -47,8 +46,6 @@ const float kInterfAngleRadians = static_cast<float>(M_PI) / 4.f;
|
|||||||
// Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
|
// Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
|
||||||
const float kBalance = 0.2f;
|
const float kBalance = 0.2f;
|
||||||
|
|
||||||
const int kNumFreqBins = kFftSize / 2 + 1;
|
|
||||||
|
|
||||||
// TODO(claguna): need comment here.
|
// TODO(claguna): need comment here.
|
||||||
const float kBeamwidthConstant = 0.00001f;
|
const float kBeamwidthConstant = 0.00001f;
|
||||||
|
|
||||||
@ -61,10 +58,6 @@ const float kBoxcarHalfWidth = 0.001f;
|
|||||||
// that our covariance matrices are positive semidefinite.
|
// that our covariance matrices are positive semidefinite.
|
||||||
const float kCovUniformGapHalfWidth = 0.001f;
|
const float kCovUniformGapHalfWidth = 0.001f;
|
||||||
|
|
||||||
// How many blocks of past masks (including the current block) we save. Saved
|
|
||||||
// masks are used for postprocessing such as removing musical noise.
|
|
||||||
const int kNumberSavedPostfilterMasks = 2;
|
|
||||||
|
|
||||||
// Lower bound on gain decay.
|
// Lower bound on gain decay.
|
||||||
const float kHalfLifeSeconds = 0.05f;
|
const float kHalfLifeSeconds = 0.05f;
|
||||||
|
|
||||||
@ -72,9 +65,15 @@ const float kHalfLifeSeconds = 0.05f;
|
|||||||
const int kMidFrequnecyLowerBoundHz = 250;
|
const int kMidFrequnecyLowerBoundHz = 250;
|
||||||
const int kMidFrequencyUpperBoundHz = 400;
|
const int kMidFrequencyUpperBoundHz = 400;
|
||||||
|
|
||||||
const int kHighFrequnecyLowerBoundHz = 4000;
|
const int kHighFrequencyLowerBoundHz = 4000;
|
||||||
const int kHighFrequencyUpperBoundHz = 7000;
|
const int kHighFrequencyUpperBoundHz = 7000;
|
||||||
|
|
||||||
|
// Mask threshold over which the data is considered signal and not interference.
|
||||||
|
const float kMaskTargetThreshold = 0.3f;
|
||||||
|
// Time in seconds after which the data is considered interference if the mask
|
||||||
|
// does not pass |kMaskTargetThreshold|.
|
||||||
|
const float kHoldTargetSeconds = 0.25f;
|
||||||
|
|
||||||
// Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is
|
// Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is
|
||||||
// used; to accomplish this, we compute both multiplications in the same loop.
|
// used; to accomplish this, we compute both multiplications in the same loop.
|
||||||
float Norm(const ComplexMatrix<float>& mat,
|
float Norm(const ComplexMatrix<float>& mat,
|
||||||
@ -126,46 +125,45 @@ int Round(float x) {
|
|||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
Beamformer::Beamformer(int chunk_size_ms,
|
Beamformer::Beamformer(const std::vector<Point>& array_geometry)
|
||||||
int sample_rate_hz,
|
: num_input_channels_(array_geometry.size()),
|
||||||
const std::vector<Point>& array_geometry)
|
mic_spacing_(MicSpacingFromGeometry(array_geometry)) {
|
||||||
: chunk_length_(sample_rate_hz / (1000.f / chunk_size_ms)),
|
|
||||||
window_(new float[kFftSize]),
|
WindowGenerator::KaiserBesselDerived(kAlpha, kFftSize, window_);
|
||||||
num_input_channels_(array_geometry.size()),
|
|
||||||
sample_rate_hz_(sample_rate_hz),
|
for (int i = 0; i < kNumberSavedPostfilterMasks; ++i) {
|
||||||
mic_spacing_(MicSpacingFromGeometry(array_geometry)),
|
postfilter_masks_[i].Resize(1, kNumFreqBins);
|
||||||
decay_threshold_(
|
}
|
||||||
pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds))),
|
}
|
||||||
mid_frequency_lower_bin_bound_(
|
|
||||||
Round(kMidFrequnecyLowerBoundHz * kFftSize / sample_rate_hz_)),
|
void Beamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
|
||||||
mid_frequency_upper_bin_bound_(
|
chunk_length_ = sample_rate_hz / (1000.f / chunk_size_ms);
|
||||||
Round(kMidFrequencyUpperBoundHz * kFftSize / sample_rate_hz_)),
|
sample_rate_hz_ = sample_rate_hz;
|
||||||
high_frequency_lower_bin_bound_(
|
decay_threshold_ =
|
||||||
Round(kHighFrequnecyLowerBoundHz * kFftSize / sample_rate_hz_)),
|
pow(2, (kFftSize / -2.f) / (sample_rate_hz_ * kHalfLifeSeconds));
|
||||||
high_frequency_upper_bin_bound_(
|
mid_frequency_lower_bin_bound_ =
|
||||||
Round(kHighFrequencyUpperBoundHz * kFftSize / sample_rate_hz_)),
|
Round(kMidFrequnecyLowerBoundHz * kFftSize / sample_rate_hz_);
|
||||||
current_block_ix_(0),
|
mid_frequency_upper_bin_bound_ =
|
||||||
previous_block_ix_(-1),
|
Round(kMidFrequencyUpperBoundHz * kFftSize / sample_rate_hz_);
|
||||||
postfilter_masks_(new MatrixF[kNumberSavedPostfilterMasks]),
|
high_frequency_lower_bin_bound_ =
|
||||||
delay_sum_masks_(new ComplexMatrixF[kNumFreqBins]),
|
Round(kHighFrequencyLowerBoundHz * kFftSize / sample_rate_hz_);
|
||||||
target_cov_mats_(new ComplexMatrixF[kNumFreqBins]),
|
high_frequency_upper_bin_bound_ =
|
||||||
interf_cov_mats_(new ComplexMatrixF[kNumFreqBins]),
|
Round(kHighFrequencyUpperBoundHz * kFftSize / sample_rate_hz_);
|
||||||
reflected_interf_cov_mats_(new ComplexMatrixF[kNumFreqBins]),
|
current_block_ix_ = 0;
|
||||||
mask_thresholds_(new float[kNumFreqBins]),
|
previous_block_ix_ = -1;
|
||||||
wave_numbers_(new float[kNumFreqBins]),
|
is_target_present_ = false;
|
||||||
rxiws_(new float[kNumFreqBins]),
|
hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
|
||||||
rpsiws_(new float[kNumFreqBins]),
|
interference_blocks_count_ = hold_target_blocks_;
|
||||||
reflected_rpsiws_(new float[kNumFreqBins]) {
|
|
||||||
DCHECK_LE(mid_frequency_upper_bin_bound_, kNumFreqBins);
|
DCHECK_LE(mid_frequency_upper_bin_bound_, kNumFreqBins);
|
||||||
DCHECK_LT(mid_frequency_lower_bin_bound_, mid_frequency_upper_bin_bound_);
|
DCHECK_LT(mid_frequency_lower_bin_bound_, mid_frequency_upper_bin_bound_);
|
||||||
DCHECK_LE(high_frequency_upper_bin_bound_, kNumFreqBins);
|
DCHECK_LE(high_frequency_upper_bin_bound_, kNumFreqBins);
|
||||||
DCHECK_LT(high_frequency_lower_bin_bound_, high_frequency_upper_bin_bound_);
|
DCHECK_LT(high_frequency_lower_bin_bound_, high_frequency_upper_bin_bound_);
|
||||||
|
|
||||||
WindowGenerator::KaiserBesselDerived(kAlpha, kFftSize, window_.get());
|
|
||||||
lapped_transform_.reset(new LappedTransform(num_input_channels_,
|
lapped_transform_.reset(new LappedTransform(num_input_channels_,
|
||||||
1,
|
1,
|
||||||
chunk_length_,
|
chunk_length_,
|
||||||
window_.get(),
|
window_,
|
||||||
kFftSize,
|
kFftSize,
|
||||||
kFftSize / 2,
|
kFftSize / 2,
|
||||||
this));
|
this));
|
||||||
@ -196,9 +194,6 @@ Beamformer::Beamformer(int chunk_size_ms,
|
|||||||
reflected_rpsiws_[i] =
|
reflected_rpsiws_[i] =
|
||||||
Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]);
|
Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]);
|
||||||
}
|
}
|
||||||
for (int i = 0; i < kNumberSavedPostfilterMasks; ++i) {
|
|
||||||
postfilter_masks_[i].Resize(1, kNumFreqBins);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Beamformer::InitDelaySumMasks() {
|
void Beamformer::InitDelaySumMasks() {
|
||||||
@ -379,6 +374,8 @@ void Beamformer::ProcessAudioBlock(const complex_f* const* input,
|
|||||||
mask_thresholds_[i]);
|
mask_thresholds_[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
EstimateTargetPresence(mask_data, kNumFreqBins);
|
||||||
|
|
||||||
// Can't access block_index - 1 on the first block.
|
// Can't access block_index - 1 on the first block.
|
||||||
if (previous_block_ix_ >= 0) {
|
if (previous_block_ix_ >= 0) {
|
||||||
ApplyDecay();
|
ApplyDecay();
|
||||||
@ -490,4 +487,18 @@ float Beamformer::MicSpacingFromGeometry(const std::vector<Point>& geometry) {
|
|||||||
return sqrt(mic_spacing);
|
return sqrt(mic_spacing);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Beamformer::EstimateTargetPresence(float* mask, int length) {
|
||||||
|
memcpy(sorted_mask_, mask, kNumFreqBins * sizeof(*mask));
|
||||||
|
const int median_ix = (length + 1) / 2;
|
||||||
|
std::nth_element(sorted_mask_,
|
||||||
|
sorted_mask_ + median_ix,
|
||||||
|
sorted_mask_ + length);
|
||||||
|
if (sorted_mask_[median_ix] > kMaskTargetThreshold) {
|
||||||
|
is_target_present_ = true;
|
||||||
|
interference_blocks_count_ = 0;
|
||||||
|
} else {
|
||||||
|
is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace webrtc
|
} // namespace webrtc
|
||||||
|
@ -29,22 +29,29 @@ class Beamformer : public LappedTransform::Callback {
|
|||||||
public:
|
public:
|
||||||
// At the moment it only accepts uniform linear microphone arrays. Using the
|
// At the moment it only accepts uniform linear microphone arrays. Using the
|
||||||
// first microphone as a reference position [0, 0, 0] is a natural choice.
|
// first microphone as a reference position [0, 0, 0] is a natural choice.
|
||||||
Beamformer(int chunk_size_ms,
|
explicit Beamformer(const std::vector<Point>& array_geometry);
|
||||||
|
virtual ~Beamformer() {};
|
||||||
|
|
||||||
// Sample rate corresponds to the lower band.
|
// Sample rate corresponds to the lower band.
|
||||||
int sample_rate_hz,
|
// Needs to be called before the Beamformer can be used.
|
||||||
const std::vector<Point>& array_geometry);
|
virtual void Initialize(int chunk_size_ms, int sample_rate_hz);
|
||||||
|
|
||||||
// Process one time-domain chunk of audio. The audio can be separated into
|
// Process one time-domain chunk of audio. The audio can be separated into
|
||||||
// two signals by frequency, with the higher half passed in as the second
|
// two signals by frequency, with the higher half passed in as the second
|
||||||
// parameter. Use NULL for |high_pass_split_input| if you only have one
|
// parameter. Use NULL for |high_pass_split_input| if you only have one
|
||||||
// audio signal. The number of frames and channels must correspond to the
|
// audio signal. The number of frames and channels must correspond to the
|
||||||
// ctor parameters. The same signal can be passed in as |input| and |output|.
|
// ctor parameters. The same signal can be passed in as |input| and |output|.
|
||||||
void ProcessChunk(const float* const* input,
|
virtual void ProcessChunk(const float* const* input,
|
||||||
const float* const* high_pass_split_input,
|
const float* const* high_pass_split_input,
|
||||||
int num_input_channels,
|
int num_input_channels,
|
||||||
int num_frames_per_band,
|
int num_frames_per_band,
|
||||||
float* const* output,
|
float* const* output,
|
||||||
float* const* high_pass_split_output);
|
float* const* high_pass_split_output);
|
||||||
|
// After processing each block |is_target_present_| is set to true if the
|
||||||
|
// target signal es present and to false otherwise. This methods can be called
|
||||||
|
// to know if the data is target signal or interference and process it
|
||||||
|
// accordingly.
|
||||||
|
virtual bool is_target_present() { return is_target_present_; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// Process one frequency-domain block of audio. This is where the fun
|
// Process one frequency-domain block of audio. This is where the fun
|
||||||
@ -53,7 +60,7 @@ class Beamformer : public LappedTransform::Callback {
|
|||||||
int num_input_channels,
|
int num_input_channels,
|
||||||
int num_freq_bins,
|
int num_freq_bins,
|
||||||
int num_output_channels,
|
int num_output_channels,
|
||||||
complex<float>* const* output);
|
complex<float>* const* output) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
typedef Matrix<float> MatrixF;
|
typedef Matrix<float> MatrixF;
|
||||||
@ -93,23 +100,30 @@ class Beamformer : public LappedTransform::Callback {
|
|||||||
void ApplyMasks(const complex_f* const* input, complex_f* const* output);
|
void ApplyMasks(const complex_f* const* input, complex_f* const* output);
|
||||||
|
|
||||||
float MicSpacingFromGeometry(const std::vector<Point>& array_geometry);
|
float MicSpacingFromGeometry(const std::vector<Point>& array_geometry);
|
||||||
|
void EstimateTargetPresence(float* mask, int length);
|
||||||
|
|
||||||
|
static const int kFftSize = 256;
|
||||||
|
static const int kNumFreqBins = kFftSize / 2 + 1;
|
||||||
|
// How many blocks of past masks (including the current block) we save. Saved
|
||||||
|
// masks are used for postprocessing such as removing musical noise.
|
||||||
|
static const int kNumberSavedPostfilterMasks = 2;
|
||||||
|
|
||||||
// Deals with the fft transform and blocking.
|
// Deals with the fft transform and blocking.
|
||||||
const int chunk_length_;
|
int chunk_length_;
|
||||||
scoped_ptr<LappedTransform> lapped_transform_;
|
scoped_ptr<LappedTransform> lapped_transform_;
|
||||||
scoped_ptr<float[]> window_;
|
float window_[kFftSize];
|
||||||
|
|
||||||
// Parameters exposed to the user.
|
// Parameters exposed to the user.
|
||||||
const int num_input_channels_;
|
const int num_input_channels_;
|
||||||
const int sample_rate_hz_;
|
int sample_rate_hz_;
|
||||||
const float mic_spacing_;
|
const float mic_spacing_;
|
||||||
|
|
||||||
// Calculated based on user-input and constants in the .cc file.
|
// Calculated based on user-input and constants in the .cc file.
|
||||||
const float decay_threshold_;
|
float decay_threshold_;
|
||||||
const int mid_frequency_lower_bin_bound_;
|
int mid_frequency_lower_bin_bound_;
|
||||||
const int mid_frequency_upper_bin_bound_;
|
int mid_frequency_upper_bin_bound_;
|
||||||
const int high_frequency_lower_bin_bound_;
|
int high_frequency_lower_bin_bound_;
|
||||||
const int high_frequency_upper_bin_bound_;
|
int high_frequency_upper_bin_bound_;
|
||||||
|
|
||||||
// Indices into |postfilter_masks_|.
|
// Indices into |postfilter_masks_|.
|
||||||
int current_block_ix_;
|
int current_block_ix_;
|
||||||
@ -117,29 +131,30 @@ class Beamformer : public LappedTransform::Callback {
|
|||||||
|
|
||||||
// Old masks are saved in this ring buffer for smoothing. Array of length
|
// Old masks are saved in this ring buffer for smoothing. Array of length
|
||||||
// |kNumberSavedMasks| matrix of size 1 x |kNumFreqBins|.
|
// |kNumberSavedMasks| matrix of size 1 x |kNumFreqBins|.
|
||||||
scoped_ptr<MatrixF[]> postfilter_masks_;
|
MatrixF postfilter_masks_[kNumberSavedPostfilterMasks];
|
||||||
|
float sorted_mask_[kNumFreqBins];
|
||||||
|
|
||||||
// Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
|
// Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
|
||||||
scoped_ptr<ComplexMatrixF[]> delay_sum_masks_;
|
ComplexMatrixF delay_sum_masks_[kNumFreqBins];
|
||||||
|
|
||||||
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
|
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
|
||||||
// |num_input_channels_|.
|
// |num_input_channels_|.
|
||||||
scoped_ptr<ComplexMatrixF[]> target_cov_mats_;
|
ComplexMatrixF target_cov_mats_[kNumFreqBins];
|
||||||
|
|
||||||
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
|
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
|
||||||
// |num_input_channels_|.
|
// |num_input_channels_|.
|
||||||
scoped_ptr<ComplexMatrixF[]> interf_cov_mats_;
|
ComplexMatrixF interf_cov_mats_[kNumFreqBins];
|
||||||
scoped_ptr<ComplexMatrixF[]> reflected_interf_cov_mats_;
|
ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins];
|
||||||
|
|
||||||
// Of length |kNumFreqBins|.
|
// Of length |kNumFreqBins|.
|
||||||
scoped_ptr<float[]> mask_thresholds_;
|
float mask_thresholds_[kNumFreqBins];
|
||||||
scoped_ptr<float[]> wave_numbers_;
|
float wave_numbers_[kNumFreqBins];
|
||||||
|
|
||||||
// Preallocated for ProcessAudioBlock()
|
// Preallocated for ProcessAudioBlock()
|
||||||
// Of length |kNumFreqBins|.
|
// Of length |kNumFreqBins|.
|
||||||
scoped_ptr<float[]> rxiws_;
|
float rxiws_[kNumFreqBins];
|
||||||
scoped_ptr<float[]> rpsiws_;
|
float rpsiws_[kNumFreqBins];
|
||||||
scoped_ptr<float[]> reflected_rpsiws_;
|
float reflected_rpsiws_[kNumFreqBins];
|
||||||
|
|
||||||
// The microphone normalization factor.
|
// The microphone normalization factor.
|
||||||
ComplexMatrixF eig_m_;
|
ComplexMatrixF eig_m_;
|
||||||
@ -148,6 +163,14 @@ class Beamformer : public LappedTransform::Callback {
|
|||||||
bool high_pass_exists_;
|
bool high_pass_exists_;
|
||||||
int num_blocks_in_this_chunk_;
|
int num_blocks_in_this_chunk_;
|
||||||
float high_pass_postfilter_mask_;
|
float high_pass_postfilter_mask_;
|
||||||
|
|
||||||
|
// True when the target signal is present.
|
||||||
|
bool is_target_present_;
|
||||||
|
// Number of blocks after which the data is considered interference if the
|
||||||
|
// mask does not pass |kMaskSignalThreshold|.
|
||||||
|
int hold_target_blocks_;
|
||||||
|
// Number of blocks since the last mask that passed |kMaskSignalThreshold|.
|
||||||
|
int interference_blocks_count_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace webrtc
|
} // namespace webrtc
|
||||||
|
@ -59,9 +59,8 @@ int main(int argc, char* argv[]) {
|
|||||||
for (int i = 0; i < FLAGS_num_input_channels; ++i) {
|
for (int i = 0; i < FLAGS_num_input_channels; ++i) {
|
||||||
array_geometry.push_back(webrtc::Point(i * FLAGS_mic_spacing, 0.f, 0.f));
|
array_geometry.push_back(webrtc::Point(i * FLAGS_mic_spacing, 0.f, 0.f));
|
||||||
}
|
}
|
||||||
webrtc::Beamformer bf(kChunkTimeMilliseconds,
|
webrtc::Beamformer bf(array_geometry);
|
||||||
FLAGS_sample_rate,
|
bf.Initialize(kChunkTimeMilliseconds, FLAGS_sample_rate);
|
||||||
array_geometry);
|
|
||||||
while (true) {
|
while (true) {
|
||||||
size_t samples_read = webrtc::PcmReadToFloat(read_file,
|
size_t samples_read = webrtc::PcmReadToFloat(read_file,
|
||||||
kInputSamplesPerChunk,
|
kInputSamplesPerChunk,
|
||||||
|
@ -0,0 +1,22 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "webrtc/modules/audio_processing/beamformer/mock_beamformer.h"
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace webrtc {
|
||||||
|
|
||||||
|
MockBeamformer::MockBeamformer(const std::vector<Point>& array_geometry)
|
||||||
|
: Beamformer(array_geometry) {}
|
||||||
|
|
||||||
|
MockBeamformer::~MockBeamformer() {}
|
||||||
|
|
||||||
|
} // namespace webrtc
|
38
webrtc/modules/audio_processing/beamformer/mock_beamformer.h
Normal file
38
webrtc/modules/audio_processing/beamformer/mock_beamformer.h
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Use of this source code is governed by a BSD-style license
|
||||||
|
* that can be found in the LICENSE file in the root of the source
|
||||||
|
* tree. An additional intellectual property rights grant can be found
|
||||||
|
* in the file PATENTS. All contributing project authors may
|
||||||
|
* be found in the AUTHORS file in the root of the source tree.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MOCK_BEAMFORMER_H_
|
||||||
|
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MOCK_BEAMFORMER_H_
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "testing/gmock/include/gmock/gmock.h"
|
||||||
|
#include "webrtc/modules/audio_processing/beamformer/beamformer.h"
|
||||||
|
|
||||||
|
namespace webrtc {
|
||||||
|
|
||||||
|
class MockBeamformer : public Beamformer {
|
||||||
|
public:
|
||||||
|
explicit MockBeamformer(const std::vector<Point>& array_geometry);
|
||||||
|
~MockBeamformer() override;
|
||||||
|
|
||||||
|
MOCK_METHOD2(Initialize, void(int chunk_size_ms, int sample_rate_hz));
|
||||||
|
MOCK_METHOD6(ProcessChunk, void(const float* const* input,
|
||||||
|
const float* const* high_pass_split_input,
|
||||||
|
int num_input_channels,
|
||||||
|
int num_frames_per_band,
|
||||||
|
float* const* output,
|
||||||
|
float* const* high_pass_split_output));
|
||||||
|
MOCK_METHOD0(is_target_present, bool());
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace webrtc
|
||||||
|
|
||||||
|
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MOCK_BEAMFORMER_H_
|
@ -24,6 +24,7 @@ struct AecCore;
|
|||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
|
|
||||||
class AudioFrame;
|
class AudioFrame;
|
||||||
|
class Beamformer;
|
||||||
class EchoCancellation;
|
class EchoCancellation;
|
||||||
class EchoControlMobile;
|
class EchoControlMobile;
|
||||||
class GainControl;
|
class GainControl;
|
||||||
@ -199,6 +200,8 @@ class AudioProcessing {
|
|||||||
static AudioProcessing* Create();
|
static AudioProcessing* Create();
|
||||||
// Allows passing in an optional configuration at create-time.
|
// Allows passing in an optional configuration at create-time.
|
||||||
static AudioProcessing* Create(const Config& config);
|
static AudioProcessing* Create(const Config& config);
|
||||||
|
// Only for testing.
|
||||||
|
static AudioProcessing* Create(const Config& config, Beamformer* beamformer);
|
||||||
virtual ~AudioProcessing() {}
|
virtual ~AudioProcessing() {}
|
||||||
|
|
||||||
// Initializes internal states, while retaining all user settings. This
|
// Initializes internal states, while retaining all user settings. This
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include "webrtc/common_audio/resampler/include/push_resampler.h"
|
#include "webrtc/common_audio/resampler/include/push_resampler.h"
|
||||||
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
|
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
|
||||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||||
|
#include "webrtc/modules/audio_processing/beamformer/mock_beamformer.h"
|
||||||
#include "webrtc/modules/audio_processing/common.h"
|
#include "webrtc/modules/audio_processing/common.h"
|
||||||
#include "webrtc/modules/audio_processing/include/audio_processing.h"
|
#include "webrtc/modules/audio_processing/include/audio_processing.h"
|
||||||
#include "webrtc/modules/audio_processing/test/test_utils.h"
|
#include "webrtc/modules/audio_processing/test/test_utils.h"
|
||||||
@ -278,6 +279,35 @@ void OpenFileAndReadMessage(const std::string filename,
|
|||||||
fclose(file);
|
fclose(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed
|
||||||
|
// stereo) file, converts to deinterleaved float (optionally downmixing) and
|
||||||
|
// returns the result in |cb|. Returns false if the file ended (or on error) and
|
||||||
|
// true otherwise.
|
||||||
|
//
|
||||||
|
// |int_data| and |float_data| are just temporary space that must be
|
||||||
|
// sufficiently large to hold the 10 ms chunk.
|
||||||
|
bool ReadChunk(FILE* file, int16_t* int_data, float* float_data,
|
||||||
|
ChannelBuffer<float>* cb) {
|
||||||
|
// The files always contain stereo audio.
|
||||||
|
size_t frame_size = cb->samples_per_channel() * 2;
|
||||||
|
size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file);
|
||||||
|
if (read_count != frame_size) {
|
||||||
|
// Check that the file really ended.
|
||||||
|
assert(feof(file));
|
||||||
|
return false; // This is expected.
|
||||||
|
}
|
||||||
|
|
||||||
|
S16ToFloat(int_data, frame_size, float_data);
|
||||||
|
if (cb->num_channels() == 1) {
|
||||||
|
MixStereoToMono(float_data, cb->data(), cb->samples_per_channel());
|
||||||
|
} else {
|
||||||
|
Deinterleave(float_data, cb->samples_per_channel(), 2,
|
||||||
|
cb->channels());
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
class ApmTest : public ::testing::Test {
|
class ApmTest : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
ApmTest();
|
ApmTest();
|
||||||
@ -1164,6 +1194,87 @@ TEST_F(ApmTest, ManualVolumeChangeIsPossible) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if !defined(WEBRTC_ANDROID) && !defined(WEBRTC_IOS)
|
||||||
|
TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) {
|
||||||
|
const int kSampleRateHz = 16000;
|
||||||
|
const int kSamplesPerChannel =
|
||||||
|
AudioProcessing::kChunkSizeMs * kSampleRateHz / 1000;
|
||||||
|
const int kNumInputChannels = 2;
|
||||||
|
const int kNumOutputChannels = 1;
|
||||||
|
const int kNumChunks = 700;
|
||||||
|
const float kScaleFactor = 0.25f;
|
||||||
|
Config config;
|
||||||
|
std::vector<webrtc::Point> geometry;
|
||||||
|
geometry.push_back(webrtc::Point(0.f, 0.f, 0.f));
|
||||||
|
geometry.push_back(webrtc::Point(0.05f, 0.f, 0.f));
|
||||||
|
config.Set<Beamforming>(new Beamforming(true, geometry));
|
||||||
|
testing::NiceMock<MockBeamformer>* beamformer =
|
||||||
|
new testing::NiceMock<MockBeamformer>(geometry);
|
||||||
|
scoped_ptr<AudioProcessing> apm(AudioProcessing::Create(config, beamformer));
|
||||||
|
EXPECT_EQ(kNoErr, apm->gain_control()->Enable(true));
|
||||||
|
ChannelBuffer<float> src_buf(kSamplesPerChannel, kNumInputChannels);
|
||||||
|
ChannelBuffer<float> dest_buf(kSamplesPerChannel, kNumOutputChannels);
|
||||||
|
const int max_length = kSamplesPerChannel * std::max(kNumInputChannels,
|
||||||
|
kNumOutputChannels);
|
||||||
|
scoped_ptr<int16_t[]> int_data(new int16_t[max_length]);
|
||||||
|
scoped_ptr<float[]> float_data(new float[max_length]);
|
||||||
|
std::string filename = ResourceFilePath("far", kSampleRateHz);
|
||||||
|
FILE* far_file = fopen(filename.c_str(), "rb");
|
||||||
|
ASSERT_TRUE(far_file != NULL) << "Could not open file " << filename << "\n";
|
||||||
|
const int kDefaultVolume = apm->gain_control()->stream_analog_level();
|
||||||
|
const int kDefaultCompressionGain =
|
||||||
|
apm->gain_control()->compression_gain_db();
|
||||||
|
bool is_target = false;
|
||||||
|
EXPECT_CALL(*beamformer, is_target_present())
|
||||||
|
.WillRepeatedly(testing::ReturnPointee(&is_target));
|
||||||
|
for (int i = 0; i < kNumChunks; ++i) {
|
||||||
|
ASSERT_TRUE(ReadChunk(far_file,
|
||||||
|
int_data.get(),
|
||||||
|
float_data.get(),
|
||||||
|
&src_buf));
|
||||||
|
for (int j = 0; j < kNumInputChannels * kSamplesPerChannel; ++j) {
|
||||||
|
src_buf.data()[j] *= kScaleFactor;
|
||||||
|
}
|
||||||
|
EXPECT_EQ(kNoErr,
|
||||||
|
apm->ProcessStream(src_buf.channels(),
|
||||||
|
src_buf.samples_per_channel(),
|
||||||
|
kSampleRateHz,
|
||||||
|
LayoutFromChannels(src_buf.num_channels()),
|
||||||
|
kSampleRateHz,
|
||||||
|
LayoutFromChannels(dest_buf.num_channels()),
|
||||||
|
dest_buf.channels()));
|
||||||
|
}
|
||||||
|
EXPECT_EQ(kDefaultVolume,
|
||||||
|
apm->gain_control()->stream_analog_level());
|
||||||
|
EXPECT_EQ(kDefaultCompressionGain,
|
||||||
|
apm->gain_control()->compression_gain_db());
|
||||||
|
rewind(far_file);
|
||||||
|
is_target = true;
|
||||||
|
for (int i = 0; i < kNumChunks; ++i) {
|
||||||
|
ASSERT_TRUE(ReadChunk(far_file,
|
||||||
|
int_data.get(),
|
||||||
|
float_data.get(),
|
||||||
|
&src_buf));
|
||||||
|
for (int j = 0; j < kNumInputChannels * kSamplesPerChannel; ++j) {
|
||||||
|
src_buf.data()[j] *= kScaleFactor;
|
||||||
|
}
|
||||||
|
EXPECT_EQ(kNoErr,
|
||||||
|
apm->ProcessStream(src_buf.channels(),
|
||||||
|
src_buf.samples_per_channel(),
|
||||||
|
kSampleRateHz,
|
||||||
|
LayoutFromChannels(src_buf.num_channels()),
|
||||||
|
kSampleRateHz,
|
||||||
|
LayoutFromChannels(dest_buf.num_channels()),
|
||||||
|
dest_buf.channels()));
|
||||||
|
}
|
||||||
|
EXPECT_LT(kDefaultVolume,
|
||||||
|
apm->gain_control()->stream_analog_level());
|
||||||
|
EXPECT_LT(kDefaultCompressionGain,
|
||||||
|
apm->gain_control()->compression_gain_db());
|
||||||
|
ASSERT_EQ(0, fclose(far_file));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
TEST_F(ApmTest, NoiseSuppression) {
|
TEST_F(ApmTest, NoiseSuppression) {
|
||||||
// Test valid suppression levels.
|
// Test valid suppression levels.
|
||||||
NoiseSuppression::Level level[] = {
|
NoiseSuppression::Level level[] = {
|
||||||
@ -2031,35 +2142,6 @@ TEST_F(ApmTest, NoErrorsWithKeyboardChannel) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed
|
|
||||||
// stereo) file, converts to deinterleaved float (optionally downmixing) and
|
|
||||||
// returns the result in |cb|. Returns false if the file ended (or on error) and
|
|
||||||
// true otherwise.
|
|
||||||
//
|
|
||||||
// |int_data| and |float_data| are just temporary space that must be
|
|
||||||
// sufficiently large to hold the 10 ms chunk.
|
|
||||||
bool ReadChunk(FILE* file, int16_t* int_data, float* float_data,
|
|
||||||
ChannelBuffer<float>* cb) {
|
|
||||||
// The files always contain stereo audio.
|
|
||||||
size_t frame_size = cb->samples_per_channel() * 2;
|
|
||||||
size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file);
|
|
||||||
if (read_count != frame_size) {
|
|
||||||
// Check that the file really ended.
|
|
||||||
assert(feof(file));
|
|
||||||
return false; // This is expected.
|
|
||||||
}
|
|
||||||
|
|
||||||
S16ToFloat(int_data, frame_size, float_data);
|
|
||||||
if (cb->num_channels() == 1) {
|
|
||||||
MixStereoToMono(float_data, cb->data(), cb->samples_per_channel());
|
|
||||||
} else {
|
|
||||||
Deinterleave(float_data, cb->samples_per_channel(), 2,
|
|
||||||
cb->channels());
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compares the reference and test arrays over a region around the expected
|
// Compares the reference and test arrays over a region around the expected
|
||||||
// delay. Finds the highest SNR in that region and adds the variance and squared
|
// delay. Finds the highest SNR in that region and adds the variance and squared
|
||||||
// error results to the supplied accumulators.
|
// error results to the supplied accumulators.
|
||||||
|
@ -180,6 +180,8 @@
|
|||||||
'audio_processing/beamformer/complex_matrix_unittest.cc',
|
'audio_processing/beamformer/complex_matrix_unittest.cc',
|
||||||
'audio_processing/beamformer/covariance_matrix_generator_unittest.cc',
|
'audio_processing/beamformer/covariance_matrix_generator_unittest.cc',
|
||||||
'audio_processing/beamformer/matrix_unittest.cc',
|
'audio_processing/beamformer/matrix_unittest.cc',
|
||||||
|
'audio_processing/beamformer/mock_beamformer.cc',
|
||||||
|
'audio_processing/beamformer/mock_beamformer.h',
|
||||||
'audio_processing/beamformer/pcm_utils.cc',
|
'audio_processing/beamformer/pcm_utils.cc',
|
||||||
'audio_processing/beamformer/pcm_utils.h',
|
'audio_processing/beamformer/pcm_utils.h',
|
||||||
'audio_processing/echo_cancellation_impl_unittest.cc',
|
'audio_processing/echo_cancellation_impl_unittest.cc',
|
||||||
|
Loading…
x
Reference in New Issue
Block a user