Add 48kHz support to Beamformer
Doing something similar for the band 16-24kHz to what is done for the band 8-16kHz Tested for 32kHz sample rate and the output is bitexact with how it was before this CL. BUG=webrtc:3146 R=andrew@webrtc.org, kwiberg@webrtc.org Review URL: https://webrtc-codereview.appspot.com/35159004 Cr-Commit-Position: refs/heads/master@{#8522} git-svn-id: http://webrtc.googlecode.com/svn/trunk@8522 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
9650ab4d59
commit
3aca0b0b31
@ -244,6 +244,24 @@ int16_t* const* AudioBuffer::split_channels(Band band) {
|
||||
}
|
||||
}
|
||||
|
||||
ChannelBuffer<int16_t>* AudioBuffer::data() {
|
||||
mixed_low_pass_valid_ = false;
|
||||
return data_->ibuf();
|
||||
}
|
||||
|
||||
const ChannelBuffer<int16_t>* AudioBuffer::data() const {
|
||||
return data_->ibuf_const();
|
||||
}
|
||||
|
||||
ChannelBuffer<int16_t>* AudioBuffer::split_data() {
|
||||
mixed_low_pass_valid_ = false;
|
||||
return split_data_.get() ? split_data_->ibuf() : data_->ibuf();
|
||||
}
|
||||
|
||||
const ChannelBuffer<int16_t>* AudioBuffer::split_data() const {
|
||||
return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const();
|
||||
}
|
||||
|
||||
const float* const* AudioBuffer::channels_const_f() const {
|
||||
return data_->fbuf_const()->channels();
|
||||
}
|
||||
@ -283,6 +301,24 @@ float* const* AudioBuffer::split_channels_f(Band band) {
|
||||
}
|
||||
}
|
||||
|
||||
ChannelBuffer<float>* AudioBuffer::data_f() {
|
||||
mixed_low_pass_valid_ = false;
|
||||
return data_->fbuf();
|
||||
}
|
||||
|
||||
const ChannelBuffer<float>* AudioBuffer::data_f() const {
|
||||
return data_->fbuf_const();
|
||||
}
|
||||
|
||||
ChannelBuffer<float>* AudioBuffer::split_data_f() {
|
||||
mixed_low_pass_valid_ = false;
|
||||
return split_data_.get() ? split_data_->fbuf() : data_->fbuf();
|
||||
}
|
||||
|
||||
const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
|
||||
return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();
|
||||
}
|
||||
|
||||
const int16_t* AudioBuffer::mixed_low_pass_data() {
|
||||
// Currently only mixing stereo to mono is supported.
|
||||
assert(num_proc_channels_ == 1 || num_proc_channels_ == 2);
|
||||
|
@ -50,30 +50,59 @@ class AudioBuffer {
|
||||
int num_keyboard_frames() const;
|
||||
int num_bands() const;
|
||||
|
||||
// Sample array accessors. Channels are guaranteed to be stored contiguously
|
||||
// in memory. Prefer to use the const variants of each accessor when
|
||||
// possible, since they incur less float<->int16 conversion overhead.
|
||||
// Returns a pointer array to the full-band channels.
|
||||
// Usage:
|
||||
// channels()[channel][sample].
|
||||
// Where:
|
||||
// 0 <= channel < |num_proc_channels_|
|
||||
// 0 <= sample < |proc_num_frames_|
|
||||
int16_t* const* channels();
|
||||
const int16_t* const* channels_const() const;
|
||||
float* const* channels_f();
|
||||
const float* const* channels_const_f() const;
|
||||
|
||||
// Returns a pointer array to the bands for a specific channel.
|
||||
// Usage:
|
||||
// split_bands(channel)[band][sample].
|
||||
// Where:
|
||||
// 0 <= channel < |num_proc_channels_|
|
||||
// 0 <= band < |num_bands_|
|
||||
// 0 <= sample < |num_split_frames_|
|
||||
int16_t* const* split_bands(int channel);
|
||||
const int16_t* const* split_bands_const(int channel) const;
|
||||
float* const* split_bands_f(int channel);
|
||||
const float* const* split_bands_const_f(int channel) const;
|
||||
|
||||
// Returns a pointer array to the channels for a specific band.
|
||||
// Usage:
|
||||
// split_channels(band)[channel][sample].
|
||||
// Where:
|
||||
// 0 <= band < |num_bands_|
|
||||
// 0 <= channel < |num_proc_channels_|
|
||||
// 0 <= sample < |num_split_frames_|
|
||||
int16_t* const* split_channels(Band band);
|
||||
const int16_t* const* split_channels_const(Band band) const;
|
||||
float* const* split_channels_f(Band band);
|
||||
const float* const* split_channels_const_f(Band band) const;
|
||||
|
||||
// Returns a pointer to the ChannelBuffer that encapsulates the full-band
|
||||
// data.
|
||||
ChannelBuffer<int16_t>* data();
|
||||
const ChannelBuffer<int16_t>* data() const;
|
||||
ChannelBuffer<float>* data_f();
|
||||
const ChannelBuffer<float>* data_f() const;
|
||||
|
||||
// Returns a pointer to the ChannelBuffer that encapsulates the split data.
|
||||
ChannelBuffer<int16_t>* split_data();
|
||||
const ChannelBuffer<int16_t>* split_data() const;
|
||||
ChannelBuffer<float>* split_data_f();
|
||||
const ChannelBuffer<float>* split_data_f() const;
|
||||
|
||||
// Returns a pointer to the low-pass data downmixed to mono. If this data
|
||||
// isn't already available it re-calculates it.
|
||||
const int16_t* mixed_low_pass_data();
|
||||
const int16_t* low_pass_reference(int channel) const;
|
||||
|
||||
// Float versions of the accessors, with automatic conversion back and forth
|
||||
// as necessary. The range of the numbers are the same as for int16_t.
|
||||
float* const* channels_f();
|
||||
const float* const* channels_const_f() const;
|
||||
float* const* split_bands_f(int channel);
|
||||
const float* const* split_bands_const_f(int channel) const;
|
||||
float* const* split_channels_f(Band band);
|
||||
const float* const* split_channels_const_f(Band band) const;
|
||||
|
||||
const float* keyboard_data() const;
|
||||
|
||||
void set_activity(AudioFrame::VADActivity activity);
|
||||
|
@ -610,12 +610,7 @@ int AudioProcessingImpl::ProcessStreamLocked() {
|
||||
|
||||
#ifdef WEBRTC_BEAMFORMER
|
||||
if (beamformer_enabled_) {
|
||||
beamformer_->ProcessChunk(ca->split_channels_const_f(kBand0To8kHz),
|
||||
ca->split_channels_const_f(kBand8To16kHz),
|
||||
ca->num_channels(),
|
||||
ca->num_frames_per_band(),
|
||||
ca->split_channels_f(kBand0To8kHz),
|
||||
ca->split_channels_f(kBand8To16kHz));
|
||||
beamformer_->ProcessChunk(ca->split_data_f(), ca->split_data_f());
|
||||
ca->set_num_channels(1);
|
||||
}
|
||||
#endif
|
||||
|
@ -295,36 +295,32 @@ void Beamformer::InitInterfCovMats() {
|
||||
}
|
||||
}
|
||||
|
||||
void Beamformer::ProcessChunk(const float* const* input,
|
||||
const float* const* high_pass_split_input,
|
||||
int num_input_channels,
|
||||
int num_frames_per_band,
|
||||
float* const* output,
|
||||
float* const* high_pass_split_output) {
|
||||
CHECK_EQ(num_input_channels, num_input_channels_);
|
||||
CHECK_EQ(num_frames_per_band, chunk_length_);
|
||||
void Beamformer::ProcessChunk(const ChannelBuffer<float>* input,
|
||||
ChannelBuffer<float>* output) {
|
||||
DCHECK_EQ(input->num_channels(), num_input_channels_);
|
||||
DCHECK_EQ(input->num_frames_per_band(), chunk_length_);
|
||||
|
||||
float old_high_pass_mask = high_pass_postfilter_mask_;
|
||||
lapped_transform_->ProcessChunk(input, output);
|
||||
|
||||
lapped_transform_->ProcessChunk(input->channels(0), output->channels(0));
|
||||
// Ramp up/down for smoothing. 1 mask per 10ms results in audible
|
||||
// discontinuities.
|
||||
const float ramp_increment =
|
||||
(high_pass_postfilter_mask_ - old_high_pass_mask) /
|
||||
input->num_frames_per_band();
|
||||
// Apply delay and sum and post-filter in the time domain. WARNING: only works
|
||||
// because delay-and-sum is not frequency dependent.
|
||||
if (high_pass_split_input != NULL) {
|
||||
// Ramp up/down for smoothing. 1 mask per 10ms results in audible
|
||||
// discontinuities.
|
||||
float ramp_inc =
|
||||
(high_pass_postfilter_mask_ - old_high_pass_mask) / num_frames_per_band;
|
||||
for (int i = 0; i < num_frames_per_band; ++i) {
|
||||
old_high_pass_mask += ramp_inc;
|
||||
for (int i = 1; i < input->num_bands(); ++i) {
|
||||
float smoothed_mask = old_high_pass_mask;
|
||||
for (int j = 0; j < input->num_frames_per_band(); ++j) {
|
||||
smoothed_mask += ramp_increment;
|
||||
|
||||
// Applying the delay and sum (at zero degrees, this is equivalent to
|
||||
// averaging).
|
||||
float sum = 0.f;
|
||||
for (int j = 0; j < num_input_channels; ++j) {
|
||||
sum += high_pass_split_input[j][i];
|
||||
for (int k = 0; k < input->num_channels(); ++k) {
|
||||
sum += input->channels(i)[k][j];
|
||||
}
|
||||
high_pass_split_output[0][i] =
|
||||
sum / num_input_channels * old_high_pass_mask;
|
||||
output->channels(i)[0][j] = sum / input->num_channels() * smoothed_mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -36,17 +36,12 @@ class Beamformer : public LappedTransform::Callback {
|
||||
// Needs to be called before the Beamformer can be used.
|
||||
virtual void Initialize(int chunk_size_ms, int sample_rate_hz);
|
||||
|
||||
// Process one time-domain chunk of audio. The audio can be separated into
|
||||
// two signals by frequency, with the higher half passed in as the second
|
||||
// parameter. Use NULL for |high_pass_split_input| if you only have one
|
||||
// audio signal. The number of frames and channels must correspond to the
|
||||
// ctor parameters. The same signal can be passed in as |input| and |output|.
|
||||
virtual void ProcessChunk(const float* const* input,
|
||||
const float* const* high_pass_split_input,
|
||||
int num_input_channels,
|
||||
int num_frames_per_band,
|
||||
float* const* output,
|
||||
float* const* high_pass_split_output);
|
||||
// Process one time-domain chunk of audio. The audio is expected to be split
|
||||
// into frequency bands inside the ChannelBuffer. The number of frames and
|
||||
// channels must correspond to the constructor parameters. The same
|
||||
// ChannelBuffer can be passed in as |input| and |output|.
|
||||
virtual void ProcessChunk(const ChannelBuffer<float>* input,
|
||||
ChannelBuffer<float>* output);
|
||||
// After processing each block |is_target_present_| is set to true if the
|
||||
// target signal es present and to false otherwise. This methods can be called
|
||||
// to know if the data is target signal or interference and process it
|
||||
|
@ -71,12 +71,7 @@ int main(int argc, char* argv[]) {
|
||||
break;
|
||||
}
|
||||
|
||||
bf.ProcessChunk(captured_audio_cb.channels(),
|
||||
NULL,
|
||||
FLAGS_num_input_channels,
|
||||
kChunkSize,
|
||||
captured_audio_cb.channels(),
|
||||
NULL);
|
||||
bf.ProcessChunk(&captured_audio_cb, &captured_audio_cb);
|
||||
webrtc::PcmWriteFromFloat(
|
||||
write_file, kChunkSize, 1, captured_audio_cb.channels());
|
||||
}
|
||||
|
@ -24,12 +24,8 @@ class MockBeamformer : public Beamformer {
|
||||
~MockBeamformer() override;
|
||||
|
||||
MOCK_METHOD2(Initialize, void(int chunk_size_ms, int sample_rate_hz));
|
||||
MOCK_METHOD6(ProcessChunk, void(const float* const* input,
|
||||
const float* const* high_pass_split_input,
|
||||
int num_input_channels,
|
||||
int num_frames_per_band,
|
||||
float* const* output,
|
||||
float* const* high_pass_split_output));
|
||||
MOCK_METHOD2(ProcessChunk, void(const ChannelBuffer<float>* input,
|
||||
ChannelBuffer<float>* output));
|
||||
MOCK_METHOD0(is_target_present, bool());
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user