Add 48kHz support to Beamformer

Doing something similar for the band 16-24kHz to what is done for the band 8-16kHz
Tested for 32kHz sample rate and the output is bitexact with how it was before this CL.

BUG=webrtc:3146
R=andrew@webrtc.org, kwiberg@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/35159004

Cr-Commit-Position: refs/heads/master@{#8522}
git-svn-id: http://webrtc.googlecode.com/svn/trunk@8522 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
aluebs@webrtc.org 2015-02-26 21:52:20 +00:00
parent 9650ab4d59
commit 3aca0b0b31
7 changed files with 104 additions and 62 deletions

View File

@ -244,6 +244,24 @@ int16_t* const* AudioBuffer::split_channels(Band band) {
}
}
ChannelBuffer<int16_t>* AudioBuffer::data() {
mixed_low_pass_valid_ = false;
return data_->ibuf();
}
const ChannelBuffer<int16_t>* AudioBuffer::data() const {
return data_->ibuf_const();
}
ChannelBuffer<int16_t>* AudioBuffer::split_data() {
mixed_low_pass_valid_ = false;
return split_data_.get() ? split_data_->ibuf() : data_->ibuf();
}
const ChannelBuffer<int16_t>* AudioBuffer::split_data() const {
return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const();
}
const float* const* AudioBuffer::channels_const_f() const {
return data_->fbuf_const()->channels();
}
@ -283,6 +301,24 @@ float* const* AudioBuffer::split_channels_f(Band band) {
}
}
ChannelBuffer<float>* AudioBuffer::data_f() {
mixed_low_pass_valid_ = false;
return data_->fbuf();
}
const ChannelBuffer<float>* AudioBuffer::data_f() const {
return data_->fbuf_const();
}
ChannelBuffer<float>* AudioBuffer::split_data_f() {
mixed_low_pass_valid_ = false;
return split_data_.get() ? split_data_->fbuf() : data_->fbuf();
}
const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();
}
const int16_t* AudioBuffer::mixed_low_pass_data() {
// Currently only mixing stereo to mono is supported.
assert(num_proc_channels_ == 1 || num_proc_channels_ == 2);

View File

@ -50,30 +50,59 @@ class AudioBuffer {
int num_keyboard_frames() const;
int num_bands() const;
// Sample array accessors. Channels are guaranteed to be stored contiguously
// in memory. Prefer to use the const variants of each accessor when
// possible, since they incur less float<->int16 conversion overhead.
// Returns a pointer array to the full-band channels.
// Usage:
// channels()[channel][sample].
// Where:
// 0 <= channel < |num_proc_channels_|
// 0 <= sample < |proc_num_frames_|
int16_t* const* channels();
const int16_t* const* channels_const() const;
float* const* channels_f();
const float* const* channels_const_f() const;
// Returns a pointer array to the bands for a specific channel.
// Usage:
// split_bands(channel)[band][sample].
// Where:
// 0 <= channel < |num_proc_channels_|
// 0 <= band < |num_bands_|
// 0 <= sample < |num_split_frames_|
int16_t* const* split_bands(int channel);
const int16_t* const* split_bands_const(int channel) const;
float* const* split_bands_f(int channel);
const float* const* split_bands_const_f(int channel) const;
// Returns a pointer array to the channels for a specific band.
// Usage:
// split_channels(band)[channel][sample].
// Where:
// 0 <= band < |num_bands_|
// 0 <= channel < |num_proc_channels_|
// 0 <= sample < |num_split_frames_|
int16_t* const* split_channels(Band band);
const int16_t* const* split_channels_const(Band band) const;
float* const* split_channels_f(Band band);
const float* const* split_channels_const_f(Band band) const;
// Returns a pointer to the ChannelBuffer that encapsulates the full-band
// data.
ChannelBuffer<int16_t>* data();
const ChannelBuffer<int16_t>* data() const;
ChannelBuffer<float>* data_f();
const ChannelBuffer<float>* data_f() const;
// Returns a pointer to the ChannelBuffer that encapsulates the split data.
ChannelBuffer<int16_t>* split_data();
const ChannelBuffer<int16_t>* split_data() const;
ChannelBuffer<float>* split_data_f();
const ChannelBuffer<float>* split_data_f() const;
// Returns a pointer to the low-pass data downmixed to mono. If this data
// isn't already available it re-calculates it.
const int16_t* mixed_low_pass_data();
const int16_t* low_pass_reference(int channel) const;
// Float versions of the accessors, with automatic conversion back and forth
// as necessary. The range of the numbers are the same as for int16_t.
float* const* channels_f();
const float* const* channels_const_f() const;
float* const* split_bands_f(int channel);
const float* const* split_bands_const_f(int channel) const;
float* const* split_channels_f(Band band);
const float* const* split_channels_const_f(Band band) const;
const float* keyboard_data() const;
void set_activity(AudioFrame::VADActivity activity);

View File

@ -610,12 +610,7 @@ int AudioProcessingImpl::ProcessStreamLocked() {
#ifdef WEBRTC_BEAMFORMER
if (beamformer_enabled_) {
beamformer_->ProcessChunk(ca->split_channels_const_f(kBand0To8kHz),
ca->split_channels_const_f(kBand8To16kHz),
ca->num_channels(),
ca->num_frames_per_band(),
ca->split_channels_f(kBand0To8kHz),
ca->split_channels_f(kBand8To16kHz));
beamformer_->ProcessChunk(ca->split_data_f(), ca->split_data_f());
ca->set_num_channels(1);
}
#endif

View File

@ -295,36 +295,32 @@ void Beamformer::InitInterfCovMats() {
}
}
void Beamformer::ProcessChunk(const float* const* input,
const float* const* high_pass_split_input,
int num_input_channels,
int num_frames_per_band,
float* const* output,
float* const* high_pass_split_output) {
CHECK_EQ(num_input_channels, num_input_channels_);
CHECK_EQ(num_frames_per_band, chunk_length_);
void Beamformer::ProcessChunk(const ChannelBuffer<float>* input,
ChannelBuffer<float>* output) {
DCHECK_EQ(input->num_channels(), num_input_channels_);
DCHECK_EQ(input->num_frames_per_band(), chunk_length_);
float old_high_pass_mask = high_pass_postfilter_mask_;
lapped_transform_->ProcessChunk(input, output);
lapped_transform_->ProcessChunk(input->channels(0), output->channels(0));
// Ramp up/down for smoothing. 1 mask per 10ms results in audible
// discontinuities.
const float ramp_increment =
(high_pass_postfilter_mask_ - old_high_pass_mask) /
input->num_frames_per_band();
// Apply delay and sum and post-filter in the time domain. WARNING: only works
// because delay-and-sum is not frequency dependent.
if (high_pass_split_input != NULL) {
// Ramp up/down for smoothing. 1 mask per 10ms results in audible
// discontinuities.
float ramp_inc =
(high_pass_postfilter_mask_ - old_high_pass_mask) / num_frames_per_band;
for (int i = 0; i < num_frames_per_band; ++i) {
old_high_pass_mask += ramp_inc;
for (int i = 1; i < input->num_bands(); ++i) {
float smoothed_mask = old_high_pass_mask;
for (int j = 0; j < input->num_frames_per_band(); ++j) {
smoothed_mask += ramp_increment;
// Applying the delay and sum (at zero degrees, this is equivalent to
// averaging).
float sum = 0.f;
for (int j = 0; j < num_input_channels; ++j) {
sum += high_pass_split_input[j][i];
for (int k = 0; k < input->num_channels(); ++k) {
sum += input->channels(i)[k][j];
}
high_pass_split_output[0][i] =
sum / num_input_channels * old_high_pass_mask;
output->channels(i)[0][j] = sum / input->num_channels() * smoothed_mask;
}
}
}

View File

@ -36,17 +36,12 @@ class Beamformer : public LappedTransform::Callback {
// Needs to be called before the Beamformer can be used.
virtual void Initialize(int chunk_size_ms, int sample_rate_hz);
// Process one time-domain chunk of audio. The audio can be separated into
// two signals by frequency, with the higher half passed in as the second
// parameter. Use NULL for |high_pass_split_input| if you only have one
// audio signal. The number of frames and channels must correspond to the
// ctor parameters. The same signal can be passed in as |input| and |output|.
virtual void ProcessChunk(const float* const* input,
const float* const* high_pass_split_input,
int num_input_channels,
int num_frames_per_band,
float* const* output,
float* const* high_pass_split_output);
// Process one time-domain chunk of audio. The audio is expected to be split
// into frequency bands inside the ChannelBuffer. The number of frames and
// channels must correspond to the constructor parameters. The same
// ChannelBuffer can be passed in as |input| and |output|.
virtual void ProcessChunk(const ChannelBuffer<float>* input,
ChannelBuffer<float>* output);
// After processing each block |is_target_present_| is set to true if the
// target signal es present and to false otherwise. This methods can be called
// to know if the data is target signal or interference and process it

View File

@ -71,12 +71,7 @@ int main(int argc, char* argv[]) {
break;
}
bf.ProcessChunk(captured_audio_cb.channels(),
NULL,
FLAGS_num_input_channels,
kChunkSize,
captured_audio_cb.channels(),
NULL);
bf.ProcessChunk(&captured_audio_cb, &captured_audio_cb);
webrtc::PcmWriteFromFloat(
write_file, kChunkSize, 1, captured_audio_cb.channels());
}

View File

@ -24,12 +24,8 @@ class MockBeamformer : public Beamformer {
~MockBeamformer() override;
MOCK_METHOD2(Initialize, void(int chunk_size_ms, int sample_rate_hz));
MOCK_METHOD6(ProcessChunk, void(const float* const* input,
const float* const* high_pass_split_input,
int num_input_channels,
int num_frames_per_band,
float* const* output,
float* const* high_pass_split_output));
MOCK_METHOD2(ProcessChunk, void(const ChannelBuffer<float>* input,
ChannelBuffer<float>* output));
MOCK_METHOD0(is_target_present, bool());
};