Implement 3 band splitting filter bank by upsampling and splitting twice into 2 bands

Implemented the 3 bands splitting filter bank by:
1. Upsample by 4/3.
2. Split twice into 2 bands.
3. Discard upper most band, because it is empty anyway.

A unittest was also implemented:
1. Generate a signal from presence or absence of sine waves of different frequencies.
2. Split into 3 bands and check their presence or absence.
3. Recombine the bands.
4. Calculate delay (as it is an IIR it depends on frequency).
5. Check that the cross correlation of input and output is high enough at that delay.

BUG=webrtc:3146
R=andrew@webrtc.org, bjornv@webrtc.org, kwiberg@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/31029004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@7754 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
aluebs@webrtc.org 2014-11-26 20:21:38 +00:00
parent 7806d8fe40
commit 79b9eba3ab
11 changed files with 466 additions and 196 deletions

View File

@ -20,7 +20,7 @@
// Maximum number of samples in a low/high-band frame.
enum
{
kMaxBandFrameLength = 240 // 10 ms at 48 kHz.
kMaxBandFrameLength = 320 // 10 ms at 64 kHz.
};
// QMF filter coefficients in Q16.

View File

@ -45,6 +45,8 @@ source_set("audio_processing") {
"audio_buffer.h",
"audio_processing_impl.cc",
"audio_processing_impl.h",
"channel_buffer.cc",
"channel_buffer.h",
"common.h",
"echo_cancellation_impl.cc",
"echo_cancellation_impl.h",

View File

@ -10,20 +10,13 @@
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/channel_buffer.h"
namespace webrtc {
namespace {
enum {
kSamplesPer8kHzChannel = 80,
kSamplesPer16kHzChannel = 160,
kSamplesPer32kHzChannel = 320,
kSamplesPer48kHzChannel = 480
};
bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
switch (layout) {
case AudioProcessing::kMono:
@ -61,64 +54,6 @@ void StereoToMono(const T* left, const T* right, T* out,
} // namespace
// One int16_t and one float ChannelBuffer that are kept in sync. The sync is
// broken when someone requests write access to either ChannelBuffer, and
// reestablished when someone requests the outdated ChannelBuffer. It is
// therefore safe to use the return value of ibuf_const() and fbuf_const()
// until the next call to ibuf() or fbuf(), and the return value of ibuf() and
// fbuf() until the next call to any of the other functions.
class IFChannelBuffer {
public:
IFChannelBuffer(int samples_per_channel, int num_channels)
: ivalid_(true),
ibuf_(samples_per_channel, num_channels),
fvalid_(true),
fbuf_(samples_per_channel, num_channels) {}
ChannelBuffer<int16_t>* ibuf() { return ibuf(false); }
ChannelBuffer<float>* fbuf() { return fbuf(false); }
const ChannelBuffer<int16_t>* ibuf_const() { return ibuf(true); }
const ChannelBuffer<float>* fbuf_const() { return fbuf(true); }
private:
ChannelBuffer<int16_t>* ibuf(bool readonly) {
RefreshI();
fvalid_ = readonly;
return &ibuf_;
}
ChannelBuffer<float>* fbuf(bool readonly) {
RefreshF();
ivalid_ = readonly;
return &fbuf_;
}
void RefreshF() {
if (!fvalid_) {
assert(ivalid_);
const int16_t* const int_data = ibuf_.data();
float* const float_data = fbuf_.data();
const int length = fbuf_.length();
for (int i = 0; i < length; ++i)
float_data[i] = int_data[i];
fvalid_ = true;
}
}
void RefreshI() {
if (!ivalid_) {
assert(fvalid_);
FloatS16ToS16(fbuf_.data(), ibuf_.length(), ibuf_.data());
ivalid_ = true;
}
}
bool ivalid_;
ChannelBuffer<int16_t> ibuf_;
bool fvalid_;
ChannelBuffer<float> fbuf_;
};
AudioBuffer::AudioBuffer(int input_samples_per_channel,
int num_input_channels,
int process_samples_per_channel,
@ -175,14 +110,14 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
if (proc_samples_per_channel_ == kSamplesPer32kHzChannel ||
proc_samples_per_channel_ == kSamplesPer48kHzChannel) {
samples_per_split_channel_ = kSamplesPer16kHzChannel;
split_channels_low_.reset(new IFChannelBuffer(samples_per_split_channel_,
split_channels_.push_back(new IFChannelBuffer(samples_per_split_channel_,
num_proc_channels_));
split_channels_.push_back(new IFChannelBuffer(samples_per_split_channel_,
num_proc_channels_));
split_channels_high_.reset(new IFChannelBuffer(samples_per_split_channel_,
num_proc_channels_));
splitting_filter_.reset(new SplittingFilter(num_proc_channels_));
if (proc_samples_per_channel_ == kSamplesPer48kHzChannel) {
split_channels_super_high_.reset(
new IFChannelBuffer(samples_per_split_channel_, num_proc_channels_));
split_channels_.push_back(new IFChannelBuffer(samples_per_split_channel_,
num_proc_channels_));
}
}
}
@ -300,112 +235,112 @@ float* const* AudioBuffer::channels_f() {
}
const int16_t* AudioBuffer::low_pass_split_data(int channel) const {
return split_channels_low_.get()
? split_channels_low_->ibuf_const()->channel(channel)
return split_channels_.size() > 0
? split_channels_[0]->ibuf_const()->channel(channel)
: data(channel);
}
int16_t* AudioBuffer::low_pass_split_data(int channel) {
mixed_low_pass_valid_ = false;
return split_channels_low_.get()
? split_channels_low_->ibuf()->channel(channel)
return split_channels_.size() > 0
? split_channels_[0]->ibuf()->channel(channel)
: data(channel);
}
const int16_t* const* AudioBuffer::low_pass_split_channels() const {
return split_channels_low_.get()
? split_channels_low_->ibuf_const()->channels()
return split_channels_.size() > 0
? split_channels_[0]->ibuf_const()->channels()
: channels();
}
int16_t* const* AudioBuffer::low_pass_split_channels() {
mixed_low_pass_valid_ = false;
return split_channels_low_.get() ? split_channels_low_->ibuf()->channels()
return split_channels_.size() > 0 ? split_channels_[0]->ibuf()->channels()
: channels();
}
const float* AudioBuffer::low_pass_split_data_f(int channel) const {
return split_channels_low_.get()
? split_channels_low_->fbuf_const()->channel(channel)
return split_channels_.size() > 0
? split_channels_[0]->fbuf_const()->channel(channel)
: data_f(channel);
}
float* AudioBuffer::low_pass_split_data_f(int channel) {
mixed_low_pass_valid_ = false;
return split_channels_low_.get()
? split_channels_low_->fbuf()->channel(channel)
return split_channels_.size() > 0
? split_channels_[0]->fbuf()->channel(channel)
: data_f(channel);
}
const float* const* AudioBuffer::low_pass_split_channels_f() const {
return split_channels_low_.get()
? split_channels_low_->fbuf_const()->channels()
return split_channels_.size() > 0
? split_channels_[0]->fbuf_const()->channels()
: channels_f();
}
float* const* AudioBuffer::low_pass_split_channels_f() {
mixed_low_pass_valid_ = false;
return split_channels_low_.get()
? split_channels_low_->fbuf()->channels()
return split_channels_.size() > 0
? split_channels_[0]->fbuf()->channels()
: channels_f();
}
const int16_t* AudioBuffer::high_pass_split_data(int channel) const {
return split_channels_high_.get()
? split_channels_high_->ibuf_const()->channel(channel)
return split_channels_.size() > 1
? split_channels_[1]->ibuf_const()->channel(channel)
: NULL;
}
int16_t* AudioBuffer::high_pass_split_data(int channel) {
return split_channels_high_.get()
? split_channels_high_->ibuf()->channel(channel)
return split_channels_.size() > 1
? split_channels_[1]->ibuf()->channel(channel)
: NULL;
}
const int16_t* const* AudioBuffer::high_pass_split_channels() const {
return split_channels_high_.get()
? split_channels_high_->ibuf_const()->channels()
return split_channels_.size() > 1
? split_channels_[1]->ibuf_const()->channels()
: NULL;
}
int16_t* const* AudioBuffer::high_pass_split_channels() {
return split_channels_high_.get() ? split_channels_high_->ibuf()->channels()
return split_channels_.size() > 1 ? split_channels_[1]->ibuf()->channels()
: NULL;
}
const float* AudioBuffer::high_pass_split_data_f(int channel) const {
return split_channels_high_.get()
? split_channels_high_->fbuf_const()->channel(channel)
return split_channels_.size() > 1
? split_channels_[1]->fbuf_const()->channel(channel)
: NULL;
}
float* AudioBuffer::high_pass_split_data_f(int channel) {
return split_channels_high_.get()
? split_channels_high_->fbuf()->channel(channel)
return split_channels_.size() > 1
? split_channels_[1]->fbuf()->channel(channel)
: NULL;
}
const float* const* AudioBuffer::high_pass_split_channels_f() const {
return split_channels_high_.get()
? split_channels_high_->fbuf_const()->channels()
return split_channels_.size() > 1
? split_channels_[1]->fbuf_const()->channels()
: NULL;
}
float* const* AudioBuffer::high_pass_split_channels_f() {
return split_channels_high_.get()
? split_channels_high_->fbuf()->channels()
return split_channels_.size() > 1
? split_channels_[1]->fbuf()->channels()
: NULL;
}
const float* const* AudioBuffer::super_high_pass_split_channels_f() const {
return split_channels_super_high_.get()
? split_channels_super_high_->fbuf_const()->channels()
return split_channels_.size() > 2
? split_channels_[2]->fbuf_const()->channels()
: NULL;
}
float* const* AudioBuffer::super_high_pass_split_channels_f() {
return split_channels_super_high_.get()
? split_channels_super_high_->fbuf()->channels()
return split_channels_.size() > 2
? split_channels_[2]->fbuf()->channels()
: NULL;
}
@ -531,29 +466,13 @@ void AudioBuffer::CopyLowPassToReference() {
}
void AudioBuffer::SplitIntoFrequencyBands() {
if (samples_per_channel() == kSamplesPer32kHzChannel) {
splitting_filter_->TwoBandsAnalysis(
channels(), samples_per_channel(), num_proc_channels_,
low_pass_split_channels(), high_pass_split_channels());
} else if (samples_per_channel() == kSamplesPer48kHzChannel) {
splitting_filter_->ThreeBandsAnalysis(
channels_f(), samples_per_channel(), num_proc_channels_,
low_pass_split_channels_f(), high_pass_split_channels_f(),
super_high_pass_split_channels_f());
}
splitting_filter_->Analysis(channels_.get(),
split_channels_.get());
}
void AudioBuffer::MergeFrequencyBands() {
if (samples_per_channel() == kSamplesPer32kHzChannel) {
splitting_filter_->TwoBandsSynthesis(
low_pass_split_channels(), high_pass_split_channels(),
samples_per_split_channel(), num_proc_channels_, channels());
} else if (samples_per_channel() == kSamplesPer48kHzChannel) {
splitting_filter_->ThreeBandsSynthesis(
low_pass_split_channels_f(), high_pass_split_channels_f(),
super_high_pass_split_channels_f(), samples_per_split_channel(),
num_proc_channels_, channels_f());
}
splitting_filter_->Synthesis(split_channels_.get(),
channels_.get());
}
} // namespace webrtc

View File

@ -13,6 +13,7 @@
#include <vector>
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/modules/audio_processing/common.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/modules/audio_processing/splitting_filter.h"
@ -122,9 +123,7 @@ class AudioBuffer {
const float* keyboard_data_;
scoped_ptr<IFChannelBuffer> channels_;
scoped_ptr<IFChannelBuffer> split_channels_low_;
scoped_ptr<IFChannelBuffer> split_channels_high_;
scoped_ptr<IFChannelBuffer> split_channels_super_high_;
ScopedVector<IFChannelBuffer> split_channels_;
scoped_ptr<SplittingFilter> splitting_filter_;
scoped_ptr<ChannelBuffer<int16_t> > mixed_low_pass_channels_;
scoped_ptr<ChannelBuffer<int16_t> > low_pass_reference_channels_;

View File

@ -56,6 +56,8 @@
'audio_buffer.h',
'audio_processing_impl.cc',
'audio_processing_impl.h',
'channel_buffer.cc',
'channel_buffer.h',
'common.h',
'echo_cancellation_impl.cc',
'echo_cancellation_impl.h',

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/channel_buffer.h"
namespace webrtc {
IFChannelBuffer::IFChannelBuffer(int samples_per_channel, int num_channels)
: ivalid_(true),
ibuf_(samples_per_channel, num_channels),
fvalid_(true),
fbuf_(samples_per_channel, num_channels) {}
ChannelBuffer<int16_t>* IFChannelBuffer::ibuf() {
RefreshI();
fvalid_ = false;
return &ibuf_;
}
ChannelBuffer<float>* IFChannelBuffer::fbuf() {
RefreshF();
ivalid_ = false;
return &fbuf_;
}
const ChannelBuffer<int16_t>* IFChannelBuffer::ibuf_const() const {
RefreshI();
return &ibuf_;
}
const ChannelBuffer<float>* IFChannelBuffer::fbuf_const() const {
RefreshF();
return &fbuf_;
}
void IFChannelBuffer::RefreshF() const {
if (!fvalid_) {
assert(ivalid_);
const int16_t* const int_data = ibuf_.data();
float* const float_data = fbuf_.data();
const int length = fbuf_.length();
for (int i = 0; i < length; ++i)
float_data[i] = int_data[i];
fvalid_ = true;
}
}
void IFChannelBuffer::RefreshI() const {
if (!ivalid_) {
assert(fvalid_);
FloatS16ToS16(fbuf_.data(), ibuf_.length(), ibuf_.data());
ivalid_ = true;
}
}
} // namespace webrtc

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_CHANNEL_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_CHANNEL_BUFFER_H_
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/modules/audio_processing/common.h"
namespace webrtc {
// One int16_t and one float ChannelBuffer that are kept in sync. The sync is
// broken when someone requests write access to either ChannelBuffer, and
// reestablished when someone requests the outdated ChannelBuffer. It is
// therefore safe to use the return value of ibuf_const() and fbuf_const()
// until the next call to ibuf() or fbuf(), and the return value of ibuf() and
// fbuf() until the next call to any of the other functions.
class IFChannelBuffer {
public:
IFChannelBuffer(int samples_per_channel, int num_channels);
ChannelBuffer<int16_t>* ibuf();
ChannelBuffer<float>* fbuf();
const ChannelBuffer<int16_t>* ibuf_const() const;
const ChannelBuffer<float>* fbuf_const() const;
int num_channels() const { return ibuf_.num_channels(); }
int samples_per_channel() const { return ibuf_.samples_per_channel(); }
private:
void RefreshF() const;
void RefreshI() const;
mutable bool ivalid_;
mutable ChannelBuffer<int16_t> ibuf_;
mutable bool fvalid_;
mutable ChannelBuffer<float> fbuf_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_CHANNEL_BUFFER_H_

View File

@ -10,56 +10,165 @@
#include "webrtc/modules/audio_processing/splitting_filter.h"
#include <assert.h>
#include "webrtc/base/checks.h"
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/channel_buffer.h"
namespace webrtc {
SplittingFilter::SplittingFilter(int channels)
: channels_(channels), two_bands_states_(new TwoBandsStates[channels]) {
}
void SplittingFilter::TwoBandsAnalysis(const int16_t* const* in_data,
int in_data_length,
int channels,
int16_t* const* low_band,
int16_t* const* high_band) {
assert(channels_ == channels);
for (int i = 0; i < channels_; ++i) {
WebRtcSpl_AnalysisQMF(in_data[i], in_data_length, low_band[i], high_band[i],
two_bands_states_[i].analysis_filter_state1,
two_bands_states_[i].analysis_filter_state2);
: channels_(channels),
two_bands_states_(new TwoBandsStates[channels]),
band1_states_(new TwoBandsStates[channels]),
band2_states_(new TwoBandsStates[channels]) {
for (int i = 0; i < channels; ++i) {
analysis_resamplers_.push_back(new PushSincResampler(
kSamplesPer48kHzChannel, kSamplesPer64kHzChannel));
synthesis_resamplers_.push_back(new PushSincResampler(
kSamplesPer64kHzChannel, kSamplesPer48kHzChannel));
}
}
void SplittingFilter::TwoBandsSynthesis(const int16_t* const* low_band,
const int16_t* const* high_band,
int band_length,
int channels,
int16_t* const* out_data) {
assert(channels_ == channels);
for (int i = 0; i < channels_; ++i) {
WebRtcSpl_SynthesisQMF(low_band[i], high_band[i], band_length, out_data[i],
two_bands_states_[i].synthesis_filter_state1,
two_bands_states_[i].synthesis_filter_state2);
void SplittingFilter::Analysis(const IFChannelBuffer* in_data,
std::vector<IFChannelBuffer*>& bands) {
DCHECK(bands.size() == 2 || bands.size() == 3);
DCHECK_EQ(channels_, in_data->num_channels());
for (size_t i = 0; i < bands.size(); ++i) {
DCHECK_EQ(channels_, bands[i]->num_channels());
DCHECK_EQ(in_data->samples_per_channel(),
static_cast<int>(bands.size()) * bands[i]->samples_per_channel());
}
if (bands.size() == 2) {
TwoBandsAnalysis(in_data, bands[0], bands[1]);
} else if (bands.size() == 3) {
ThreeBandsAnalysis(in_data, bands[0], bands[1], bands[2]);
}
}
void SplittingFilter::ThreeBandsAnalysis(const float* const* in_data,
int in_data_length,
int channels,
float* const* low_band,
float* const* high_band,
float* const* super_high_band) {
void SplittingFilter::Synthesis(const std::vector<IFChannelBuffer*>& bands,
IFChannelBuffer* out_data) {
DCHECK(bands.size() == 2 || bands.size() == 3);
DCHECK_EQ(channels_, out_data->num_channels());
for (size_t i = 0; i < bands.size(); ++i) {
DCHECK_EQ(channels_, bands[i]->num_channels());
DCHECK_EQ(out_data->samples_per_channel(),
static_cast<int>(bands.size()) * bands[i]->samples_per_channel());
}
if (bands.size() == 2) {
TwoBandsSynthesis(bands[0], bands[1], out_data);
} else if (bands.size() == 3) {
ThreeBandsSynthesis(bands[0], bands[1], bands[2], out_data);
}
}
void SplittingFilter::ThreeBandsSynthesis(const float* const* low_band,
const float* const* high_band,
const float* const* super_high_band,
int band_length,
int channels,
float* const* out_data) {
void SplittingFilter::TwoBandsAnalysis(const IFChannelBuffer* in_data,
IFChannelBuffer* band1,
IFChannelBuffer* band2) {
for (int i = 0; i < channels_; ++i) {
WebRtcSpl_AnalysisQMF(in_data->ibuf_const()->channel(i),
in_data->samples_per_channel(),
band1->ibuf()->channel(i),
band2->ibuf()->channel(i),
two_bands_states_[i].analysis_state1,
two_bands_states_[i].analysis_state2);
}
}
void SplittingFilter::TwoBandsSynthesis(const IFChannelBuffer* band1,
const IFChannelBuffer* band2,
IFChannelBuffer* out_data) {
for (int i = 0; i < channels_; ++i) {
WebRtcSpl_SynthesisQMF(band1->ibuf_const()->channel(i),
band2->ibuf_const()->channel(i),
band1->samples_per_channel(),
out_data->ibuf()->channel(i),
two_bands_states_[i].synthesis_state1,
two_bands_states_[i].synthesis_state2);
}
}
// This is a simple implementation using the existing code and will be replaced
// by a proper 3 band filter bank.
// It up-samples from 48kHz to 64kHz, splits twice into 2 bands and discards the
// uppermost band, because it is empty anyway.
void SplittingFilter::ThreeBandsAnalysis(const IFChannelBuffer* in_data,
IFChannelBuffer* band1,
IFChannelBuffer* band2,
IFChannelBuffer* band3) {
DCHECK_EQ(kSamplesPer48kHzChannel,
in_data->samples_per_channel());
InitBuffers();
for (int i = 0; i < channels_; ++i) {
analysis_resamplers_[i]->Resample(in_data->ibuf_const()->channel(i),
kSamplesPer48kHzChannel,
int_buffer_.get(),
kSamplesPer64kHzChannel);
WebRtcSpl_AnalysisQMF(int_buffer_.get(),
kSamplesPer64kHzChannel,
int_buffer_.get(),
int_buffer_.get() + kSamplesPer32kHzChannel,
two_bands_states_[i].analysis_state1,
two_bands_states_[i].analysis_state2);
WebRtcSpl_AnalysisQMF(int_buffer_.get(),
kSamplesPer32kHzChannel,
band1->ibuf()->channel(i),
band2->ibuf()->channel(i),
band1_states_[i].analysis_state1,
band1_states_[i].analysis_state2);
WebRtcSpl_AnalysisQMF(int_buffer_.get() + kSamplesPer32kHzChannel,
kSamplesPer32kHzChannel,
int_buffer_.get(),
band3->ibuf()->channel(i),
band2_states_[i].analysis_state1,
band2_states_[i].analysis_state2);
}
}
// This is a simple implementation using the existing code and will be replaced
// by a proper 3 band filter bank.
// Using an empty uppermost band, it merges the 4 bands in 2 steps and
// down-samples from 64kHz to 48kHz.
void SplittingFilter::ThreeBandsSynthesis(const IFChannelBuffer* band1,
const IFChannelBuffer* band2,
const IFChannelBuffer* band3,
IFChannelBuffer* out_data) {
DCHECK_EQ(kSamplesPer48kHzChannel,
out_data->samples_per_channel());
InitBuffers();
for (int i = 0; i < channels_; ++i) {
memset(int_buffer_.get(),
0,
kSamplesPer64kHzChannel * sizeof(int_buffer_[0]));
WebRtcSpl_SynthesisQMF(band1->ibuf_const()->channel(i),
band2->ibuf_const()->channel(i),
kSamplesPer16kHzChannel,
int_buffer_.get(),
band1_states_[i].synthesis_state1,
band1_states_[i].synthesis_state2);
WebRtcSpl_SynthesisQMF(int_buffer_.get() + kSamplesPer32kHzChannel,
band3->ibuf_const()->channel(i),
kSamplesPer16kHzChannel,
int_buffer_.get() + kSamplesPer32kHzChannel,
band2_states_[i].synthesis_state1,
band2_states_[i].synthesis_state2);
WebRtcSpl_SynthesisQMF(int_buffer_.get(),
int_buffer_.get() + kSamplesPer32kHzChannel,
kSamplesPer32kHzChannel,
int_buffer_.get(),
two_bands_states_[i].synthesis_state1,
two_bands_states_[i].synthesis_state2);
synthesis_resamplers_[i]->Resample(int_buffer_.get(),
kSamplesPer64kHzChannel,
out_data->ibuf()->channel(i),
kSamplesPer48kHzChannel);
}
}
void SplittingFilter::InitBuffers() {
if (!int_buffer_) {
int_buffer_.reset(new int16_t[kSamplesPer64kHzChannel]);
}
}
} // namespace webrtc

View File

@ -13,56 +13,80 @@
#include <string.h>
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/system_wrappers/interface/scoped_vector.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class IFChannelBuffer;
enum {
kSamplesPer8kHzChannel = 80,
kSamplesPer16kHzChannel = 160,
kSamplesPer32kHzChannel = 320,
kSamplesPer48kHzChannel = 480,
kSamplesPer64kHzChannel = 640
};
struct TwoBandsStates {
TwoBandsStates() {
memset(analysis_filter_state1, 0, sizeof(analysis_filter_state1));
memset(analysis_filter_state2, 0, sizeof(analysis_filter_state2));
memset(synthesis_filter_state1, 0, sizeof(synthesis_filter_state1));
memset(synthesis_filter_state2, 0, sizeof(synthesis_filter_state2));
memset(analysis_state1, 0, sizeof(analysis_state1));
memset(analysis_state2, 0, sizeof(analysis_state2));
memset(synthesis_state1, 0, sizeof(synthesis_state1));
memset(synthesis_state2, 0, sizeof(synthesis_state2));
}
static const int kStateSize = 6;
int analysis_filter_state1[kStateSize];
int analysis_filter_state2[kStateSize];
int synthesis_filter_state1[kStateSize];
int synthesis_filter_state2[kStateSize];
int analysis_state1[kStateSize];
int analysis_state2[kStateSize];
int synthesis_state1[kStateSize];
int synthesis_state2[kStateSize];
};
// Splitting filter which is able to split into and merge from 2 or 3 frequency
// bands. The number of channels needs to be provided at construction time.
//
// For each block, Analysis() is called to split into bands and then Synthesis()
// to merge these bands again. The input and output signals are contained in
// IFChannelBuffers and for the different bands an array of IFChannelBuffers is
// used.
class SplittingFilter {
public:
SplittingFilter(int channels);
void TwoBandsAnalysis(const int16_t* const* in_data,
int in_data_length,
int channels,
int16_t* const* low_band,
int16_t* const* high_band);
void TwoBandsSynthesis(const int16_t* const* low_band,
const int16_t* const* high_band,
int band_length,
int channels,
int16_t* const* out_data);
void ThreeBandsAnalysis(const float* const* in_data,
int in_data_length,
int channels,
float* const* low_band,
float* const* high_band,
float* const* super_high_band);
void ThreeBandsSynthesis(const float* const* low_band,
const float* const* high_band,
const float* const* super_high_band,
int band_length,
int channels,
float* const* out_data);
void Analysis(const IFChannelBuffer* in_data,
std::vector<IFChannelBuffer*>& bands);
void Synthesis(const std::vector<IFChannelBuffer*>& bands,
IFChannelBuffer* out_data);
private:
// These work for 640 samples or less.
void TwoBandsAnalysis(const IFChannelBuffer* in_data,
IFChannelBuffer* band1,
IFChannelBuffer* band2);
void TwoBandsSynthesis(const IFChannelBuffer* band1,
const IFChannelBuffer* band2,
IFChannelBuffer* out_data);
// These only work for 480 samples at the moment.
void ThreeBandsAnalysis(const IFChannelBuffer* in_data,
IFChannelBuffer* band1,
IFChannelBuffer* band2,
IFChannelBuffer* band3);
void ThreeBandsSynthesis(const IFChannelBuffer* band1,
const IFChannelBuffer* band2,
const IFChannelBuffer* band3,
IFChannelBuffer* out_data);
void InitBuffers();
int channels_;
scoped_ptr<TwoBandsStates[]> two_bands_states_;
scoped_ptr<TwoBandsStates[]> band1_states_;
scoped_ptr<TwoBandsStates[]> band2_states_;
ScopedVector<PushSincResampler> analysis_resamplers_;
ScopedVector<PushSincResampler> synthesis_resamplers_;
scoped_ptr<int16_t[]> int_buffer_;
};
} // namespace webrtc

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// MSVC++ requires this to be set before any other includes to get M_PI.
#define _USE_MATH_DEFINES
#include <math.h>
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/modules/audio_processing/channel_buffer.h"
#include "webrtc/modules/audio_processing/splitting_filter.h"
#include "webrtc/common_audio/include/audio_util.h"
namespace webrtc {
// Generates a signal from presence or absence of sine waves of different
// frequencies.
// Splits into 3 bands and checks their presence or absence.
// Recombines the bands.
// Calculates the delay.
// Checks that the cross correlation of input and output is high enough at the
// calculated delay.
TEST(SplittingFilterTest, SplitsIntoThreeBandsAndReconstructs) {
static const int kChannels = 1;
static const int kSampleRateHz = 48000;
static const int kNumBands = 3;
static const int kFrequenciesHz[kNumBands] = {1000, 12000, 18000};
static const float kAmplitude = 8192;
static const int kChunks = 8;
SplittingFilter splitting_filter(kChannels);
IFChannelBuffer in_data(kSamplesPer48kHzChannel, kChannels);
IFChannelBuffer out_data(kSamplesPer48kHzChannel, kChannels);
ScopedVector<IFChannelBuffer> bands;
for (int i = 0; i < kNumBands; ++i) {
bands.push_back(new IFChannelBuffer(kSamplesPer16kHzChannel, kChannels));
}
for (int i = 0; i < kChunks; ++i) {
// Input signal generation.
bool is_present[kNumBands];
memset(in_data.fbuf()->channel(0),
0,
kSamplesPer48kHzChannel * sizeof(in_data.fbuf()->channel(0)[0]));
for (int j = 0; j < kNumBands; ++j) {
is_present[j] = i & (1 << j);
float amplitude = is_present[j] ? kAmplitude : 0;
for (int k = 0; k < kSamplesPer48kHzChannel; ++k) {
in_data.fbuf()->channel(0)[k] +=
amplitude * sin(2 * M_PI * kFrequenciesHz[j] *
(i * kSamplesPer48kHzChannel + k) / kSampleRateHz);
}
}
// Three band splitting filter.
splitting_filter.Analysis(&in_data, bands.get());
// Energy calculation.
float energy[kNumBands];
for (int j = 0; j < kNumBands; ++j) {
energy[j] = 0;
for (int k = 0; k < kSamplesPer16kHzChannel; ++k) {
energy[j] += bands[j]->fbuf_const()->channel(0)[k] *
bands[j]->fbuf_const()->channel(0)[k];
}
energy[j] /= kSamplesPer16kHzChannel;
if (is_present[j]) {
EXPECT_GT(energy[j], kAmplitude * kAmplitude / 4);
} else {
EXPECT_LT(energy[j], kAmplitude * kAmplitude / 4);
}
}
// Three band merge.
splitting_filter.Synthesis(bands.get(), &out_data);
// Delay and cross correlation estimation.
float xcorr = 0;
for (int delay = 0; delay < kSamplesPer48kHzChannel; ++delay) {
float tmpcorr = 0;
for (int j = delay; j < kSamplesPer48kHzChannel; ++j) {
tmpcorr += in_data.fbuf_const()->channel(0)[j] *
out_data.fbuf_const()->channel(0)[j - delay];
}
tmpcorr /= kSamplesPer48kHzChannel;
if (tmpcorr > xcorr) {
xcorr = tmpcorr;
}
}
// High cross correlation check.
bool any_present = false;
for (int j = 0; j < kNumBands; ++j) {
any_present |= is_present[j];
}
if (any_present) {
EXPECT_GT(xcorr, kAmplitude * kAmplitude / 4);
}
}
}
} // namespace webrtc

View File

@ -162,6 +162,7 @@
'audio_processing/aec/system_delay_unittest.cc',
'audio_processing/aec/echo_cancellation_unittest.cc',
'audio_processing/echo_cancellation_impl_unittest.cc',
'audio_processing/splitting_filter_unittest.cc',
'audio_processing/utility/delay_estimator_unittest.cc',
'audio_processing/utility/ring_buffer_unittest.cc',
'bitrate_controller/bitrate_controller_unittest.cc',