Support 48kHz in Noise Suppression
Doing the same for the 16-24kHz band than was done in the 8-16kHz. Results look and sound as nice. BUG=webrtc:3146 R=andrew@webrtc.org, bjornv@webrtc.org, kwiberg@webrtc.org Review URL: https://webrtc-codereview.appspot.com/29139004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7865 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
d8ca723de7
commit
c5ebbd98f5
@ -65,6 +65,7 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
|
|||||||
proc_samples_per_channel_(process_samples_per_channel),
|
proc_samples_per_channel_(process_samples_per_channel),
|
||||||
num_proc_channels_(num_process_channels),
|
num_proc_channels_(num_process_channels),
|
||||||
output_samples_per_channel_(output_samples_per_channel),
|
output_samples_per_channel_(output_samples_per_channel),
|
||||||
|
num_bands_(1),
|
||||||
samples_per_split_channel_(proc_samples_per_channel_),
|
samples_per_split_channel_(proc_samples_per_channel_),
|
||||||
mixed_low_pass_valid_(false),
|
mixed_low_pass_valid_(false),
|
||||||
reference_copied_(false),
|
reference_copied_(false),
|
||||||
@ -111,6 +112,7 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
|
|||||||
if (proc_samples_per_channel_ == kSamplesPer32kHzChannel ||
|
if (proc_samples_per_channel_ == kSamplesPer32kHzChannel ||
|
||||||
proc_samples_per_channel_ == kSamplesPer48kHzChannel) {
|
proc_samples_per_channel_ == kSamplesPer48kHzChannel) {
|
||||||
samples_per_split_channel_ = kSamplesPer16kHzChannel;
|
samples_per_split_channel_ = kSamplesPer16kHzChannel;
|
||||||
|
num_bands_ = proc_samples_per_channel_ / samples_per_split_channel_;
|
||||||
split_channels_.push_back(new IFChannelBuffer(samples_per_split_channel_,
|
split_channels_.push_back(new IFChannelBuffer(samples_per_split_channel_,
|
||||||
num_proc_channels_));
|
num_proc_channels_));
|
||||||
split_channels_.push_back(new IFChannelBuffer(samples_per_split_channel_,
|
split_channels_.push_back(new IFChannelBuffer(samples_per_split_channel_,
|
||||||
@ -121,6 +123,8 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
|
|||||||
num_proc_channels_));
|
num_proc_channels_));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
bands_.reset(new int16_t*[num_proc_channels_ * kMaxNumBands]);
|
||||||
|
bands_f_.reset(new float*[num_proc_channels_ * kMaxNumBands]);
|
||||||
}
|
}
|
||||||
|
|
||||||
AudioBuffer::~AudioBuffer() {}
|
AudioBuffer::~AudioBuffer() {}
|
||||||
@ -216,14 +220,28 @@ int16_t* const* AudioBuffer::channels() {
|
|||||||
return channels_->ibuf()->channels();
|
return channels_->ibuf()->channels();
|
||||||
}
|
}
|
||||||
|
|
||||||
const int16_t* AudioBuffer::split_data_const(int channel, Band band) const {
|
const int16_t* const* AudioBuffer::split_bands_const(int channel) const {
|
||||||
const int16_t* const* chs = split_channels_const(band);
|
// This is necessary to make sure that the int16_t data is up to date in the
|
||||||
return chs ? chs[channel] : NULL;
|
// IFChannelBuffer.
|
||||||
|
// TODO(aluebs): Having to depend on this to get the updated data is bug
|
||||||
|
// prone. One solution is to have ChannelBuffer track the bands as well.
|
||||||
|
for (int i = 0; i < kMaxNumBands; ++i) {
|
||||||
|
int16_t* const* channels =
|
||||||
|
const_cast<int16_t* const*>(split_channels_const(static_cast<Band>(i)));
|
||||||
|
bands_[kMaxNumBands * channel + i] = channels ? channels[channel] : NULL;
|
||||||
|
}
|
||||||
|
return &bands_[kMaxNumBands * channel];
|
||||||
}
|
}
|
||||||
|
|
||||||
int16_t* AudioBuffer::split_data(int channel, Band band) {
|
int16_t* const* AudioBuffer::split_bands(int channel) {
|
||||||
int16_t* const* chs = split_channels(band);
|
mixed_low_pass_valid_ = false;
|
||||||
return chs ? chs[channel] : NULL;
|
// This is necessary to make sure that the int16_t data is up to date and the
|
||||||
|
// float data is marked as invalid in the IFChannelBuffer.
|
||||||
|
for (int i = 0; i < kMaxNumBands; ++i) {
|
||||||
|
int16_t* const* channels = split_channels(static_cast<Band>(i));
|
||||||
|
bands_[kMaxNumBands * channel + i] = channels ? channels[channel] : NULL;
|
||||||
|
}
|
||||||
|
return &bands_[kMaxNumBands * channel];
|
||||||
}
|
}
|
||||||
|
|
||||||
const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
|
const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
|
||||||
@ -260,14 +278,28 @@ float* const* AudioBuffer::channels_f() {
|
|||||||
return channels_->fbuf()->channels();
|
return channels_->fbuf()->channels();
|
||||||
}
|
}
|
||||||
|
|
||||||
const float* AudioBuffer::split_data_const_f(int channel, Band band) const {
|
const float* const* AudioBuffer::split_bands_const_f(int channel) const {
|
||||||
const float* const* chs = split_channels_const_f(band);
|
// This is necessary to make sure that the float data is up to date in the
|
||||||
return chs ? chs[channel] : NULL;
|
// IFChannelBuffer.
|
||||||
|
for (int i = 0; i < kMaxNumBands; ++i) {
|
||||||
|
float* const* channels =
|
||||||
|
const_cast<float* const*>(split_channels_const_f(static_cast<Band>(i)));
|
||||||
|
bands_f_[kMaxNumBands * channel + i] = channels ? channels[channel] : NULL;
|
||||||
|
|
||||||
|
}
|
||||||
|
return &bands_f_[kMaxNumBands * channel];
|
||||||
}
|
}
|
||||||
|
|
||||||
float* AudioBuffer::split_data_f(int channel, Band band) {
|
float* const* AudioBuffer::split_bands_f(int channel) {
|
||||||
float* const* chs = split_channels_f(band);
|
mixed_low_pass_valid_ = false;
|
||||||
return chs ? chs[channel] : NULL;
|
// This is necessary to make sure that the float data is up to date and the
|
||||||
|
// int16_t data is marked as invalid in the IFChannelBuffer.
|
||||||
|
for (int i = 0; i < kMaxNumBands; ++i) {
|
||||||
|
float* const* channels = split_channels_f(static_cast<Band>(i));
|
||||||
|
bands_f_[kMaxNumBands * channel + i] = channels ? channels[channel] : NULL;
|
||||||
|
|
||||||
|
}
|
||||||
|
return &bands_f_[kMaxNumBands * channel];
|
||||||
}
|
}
|
||||||
|
|
||||||
const float* const* AudioBuffer::split_channels_const_f(Band band) const {
|
const float* const* AudioBuffer::split_channels_const_f(Band band) const {
|
||||||
@ -292,7 +324,7 @@ const int16_t* AudioBuffer::mixed_low_pass_data() {
|
|||||||
assert(num_proc_channels_ == 1 || num_proc_channels_ == 2);
|
assert(num_proc_channels_ == 1 || num_proc_channels_ == 2);
|
||||||
|
|
||||||
if (num_proc_channels_ == 1) {
|
if (num_proc_channels_ == 1) {
|
||||||
return split_data_const(0, kBand0To8kHz);
|
return split_bands_const(0)[kBand0To8kHz];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!mixed_low_pass_valid_) {
|
if (!mixed_low_pass_valid_) {
|
||||||
@ -300,8 +332,8 @@ const int16_t* AudioBuffer::mixed_low_pass_data() {
|
|||||||
mixed_low_pass_channels_.reset(
|
mixed_low_pass_channels_.reset(
|
||||||
new ChannelBuffer<int16_t>(samples_per_split_channel_, 1));
|
new ChannelBuffer<int16_t>(samples_per_split_channel_, 1));
|
||||||
}
|
}
|
||||||
StereoToMono(split_data_const(0, kBand0To8kHz),
|
StereoToMono(split_bands_const(0)[kBand0To8kHz],
|
||||||
split_data_const(1, kBand0To8kHz),
|
split_bands_const(1)[kBand0To8kHz],
|
||||||
mixed_low_pass_channels_->data(),
|
mixed_low_pass_channels_->data(),
|
||||||
samples_per_split_channel_);
|
samples_per_split_channel_);
|
||||||
mixed_low_pass_valid_ = true;
|
mixed_low_pass_valid_ = true;
|
||||||
@ -346,6 +378,10 @@ int AudioBuffer::samples_per_keyboard_channel() const {
|
|||||||
return input_samples_per_channel_;
|
return input_samples_per_channel_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int AudioBuffer::num_bands() const {
|
||||||
|
return num_bands_;
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(andrew): Do deinterleaving and mixing in one step?
|
// TODO(andrew): Do deinterleaving and mixing in one step?
|
||||||
void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
|
void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
|
||||||
assert(proc_samples_per_channel_ == input_samples_per_channel_);
|
assert(proc_samples_per_channel_ == input_samples_per_channel_);
|
||||||
@ -404,7 +440,7 @@ void AudioBuffer::CopyLowPassToReference() {
|
|||||||
num_proc_channels_));
|
num_proc_channels_));
|
||||||
}
|
}
|
||||||
for (int i = 0; i < num_proc_channels_; i++) {
|
for (int i = 0; i < num_proc_channels_; i++) {
|
||||||
low_pass_reference_channels_->CopyFrom(split_data_const(i, kBand0To8kHz),
|
low_pass_reference_channels_->CopyFrom(split_bands_const(i)[kBand0To8kHz],
|
||||||
i);
|
i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -27,6 +27,7 @@ namespace webrtc {
|
|||||||
class PushSincResampler;
|
class PushSincResampler;
|
||||||
class IFChannelBuffer;
|
class IFChannelBuffer;
|
||||||
|
|
||||||
|
static const int kMaxNumBands = 3;
|
||||||
enum Band {
|
enum Band {
|
||||||
kBand0To8kHz = 0,
|
kBand0To8kHz = 0,
|
||||||
kBand8To16kHz = 1,
|
kBand8To16kHz = 1,
|
||||||
@ -47,6 +48,7 @@ class AudioBuffer {
|
|||||||
int samples_per_channel() const;
|
int samples_per_channel() const;
|
||||||
int samples_per_split_channel() const;
|
int samples_per_split_channel() const;
|
||||||
int samples_per_keyboard_channel() const;
|
int samples_per_keyboard_channel() const;
|
||||||
|
int num_bands() const;
|
||||||
|
|
||||||
// Sample array accessors. Channels are guaranteed to be stored contiguously
|
// Sample array accessors. Channels are guaranteed to be stored contiguously
|
||||||
// in memory. Prefer to use the const variants of each accessor when
|
// in memory. Prefer to use the const variants of each accessor when
|
||||||
@ -55,8 +57,8 @@ class AudioBuffer {
|
|||||||
const int16_t* data_const(int channel) const;
|
const int16_t* data_const(int channel) const;
|
||||||
int16_t* const* channels();
|
int16_t* const* channels();
|
||||||
const int16_t* const* channels_const() const;
|
const int16_t* const* channels_const() const;
|
||||||
int16_t* split_data(int channel, Band band);
|
int16_t* const* split_bands(int channel);
|
||||||
const int16_t* split_data_const(int channel, Band band) const;
|
const int16_t* const* split_bands_const(int channel) const;
|
||||||
int16_t* const* split_channels(Band band);
|
int16_t* const* split_channels(Band band);
|
||||||
const int16_t* const* split_channels_const(Band band) const;
|
const int16_t* const* split_channels_const(Band band) const;
|
||||||
|
|
||||||
@ -71,8 +73,8 @@ class AudioBuffer {
|
|||||||
const float* data_const_f(int channel) const;
|
const float* data_const_f(int channel) const;
|
||||||
float* const* channels_f();
|
float* const* channels_f();
|
||||||
const float* const* channels_const_f() const;
|
const float* const* channels_const_f() const;
|
||||||
float* split_data_f(int channel, Band band);
|
float* const* split_bands_f(int channel);
|
||||||
const float* split_data_const_f(int channel, Band band) const;
|
const float* const* split_bands_const_f(int channel) const;
|
||||||
float* const* split_channels_f(Band band);
|
float* const* split_channels_f(Band band);
|
||||||
const float* const* split_channels_const_f(Band band) const;
|
const float* const* split_channels_const_f(Band band) const;
|
||||||
|
|
||||||
@ -110,6 +112,7 @@ class AudioBuffer {
|
|||||||
const int proc_samples_per_channel_;
|
const int proc_samples_per_channel_;
|
||||||
const int num_proc_channels_;
|
const int num_proc_channels_;
|
||||||
const int output_samples_per_channel_;
|
const int output_samples_per_channel_;
|
||||||
|
int num_bands_;
|
||||||
int samples_per_split_channel_;
|
int samples_per_split_channel_;
|
||||||
bool mixed_low_pass_valid_;
|
bool mixed_low_pass_valid_;
|
||||||
bool reference_copied_;
|
bool reference_copied_;
|
||||||
@ -118,6 +121,8 @@ class AudioBuffer {
|
|||||||
const float* keyboard_data_;
|
const float* keyboard_data_;
|
||||||
scoped_ptr<IFChannelBuffer> channels_;
|
scoped_ptr<IFChannelBuffer> channels_;
|
||||||
ScopedVector<IFChannelBuffer> split_channels_;
|
ScopedVector<IFChannelBuffer> split_channels_;
|
||||||
|
scoped_ptr<int16_t*[]> bands_;
|
||||||
|
scoped_ptr<float*[]> bands_f_;
|
||||||
scoped_ptr<SplittingFilter> splitting_filter_;
|
scoped_ptr<SplittingFilter> splitting_filter_;
|
||||||
scoped_ptr<ChannelBuffer<int16_t> > mixed_low_pass_channels_;
|
scoped_ptr<ChannelBuffer<int16_t> > mixed_low_pass_channels_;
|
||||||
scoped_ptr<ChannelBuffer<int16_t> > low_pass_reference_channels_;
|
scoped_ptr<ChannelBuffer<int16_t> > low_pass_reference_channels_;
|
||||||
|
@ -89,7 +89,7 @@ int EchoCancellationImpl::ProcessRenderAudio(const AudioBuffer* audio) {
|
|||||||
Handle* my_handle = static_cast<Handle*>(handle(handle_index));
|
Handle* my_handle = static_cast<Handle*>(handle(handle_index));
|
||||||
err = WebRtcAec_BufferFarend(
|
err = WebRtcAec_BufferFarend(
|
||||||
my_handle,
|
my_handle,
|
||||||
audio->split_data_const_f(j, kBand0To8kHz),
|
audio->split_bands_const_f(j)[kBand0To8kHz],
|
||||||
static_cast<int16_t>(audio->samples_per_split_channel()));
|
static_cast<int16_t>(audio->samples_per_split_channel()));
|
||||||
|
|
||||||
if (err != apm_->kNoError) {
|
if (err != apm_->kNoError) {
|
||||||
@ -129,10 +129,10 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
|||||||
Handle* my_handle = handle(handle_index);
|
Handle* my_handle = handle(handle_index);
|
||||||
err = WebRtcAec_Process(
|
err = WebRtcAec_Process(
|
||||||
my_handle,
|
my_handle,
|
||||||
audio->split_data_const_f(i, kBand0To8kHz),
|
audio->split_bands_const_f(i)[kBand0To8kHz],
|
||||||
audio->split_data_const_f(i, kBand8To16kHz),
|
audio->split_bands_const_f(i)[kBand8To16kHz],
|
||||||
audio->split_data_f(i, kBand0To8kHz),
|
audio->split_bands_f(i)[kBand0To8kHz],
|
||||||
audio->split_data_f(i, kBand8To16kHz),
|
audio->split_bands_f(i)[kBand8To16kHz],
|
||||||
static_cast<int16_t>(audio->samples_per_split_channel()),
|
static_cast<int16_t>(audio->samples_per_split_channel()),
|
||||||
apm_->stream_delay_ms(),
|
apm_->stream_delay_ms(),
|
||||||
stream_drift_samples_);
|
stream_drift_samples_);
|
||||||
|
@ -95,7 +95,7 @@ int EchoControlMobileImpl::ProcessRenderAudio(const AudioBuffer* audio) {
|
|||||||
Handle* my_handle = static_cast<Handle*>(handle(handle_index));
|
Handle* my_handle = static_cast<Handle*>(handle(handle_index));
|
||||||
err = WebRtcAecm_BufferFarend(
|
err = WebRtcAecm_BufferFarend(
|
||||||
my_handle,
|
my_handle,
|
||||||
audio->split_data_const(j, kBand0To8kHz),
|
audio->split_bands_const(j)[kBand0To8kHz],
|
||||||
static_cast<int16_t>(audio->samples_per_split_channel()));
|
static_cast<int16_t>(audio->samples_per_split_channel()));
|
||||||
|
|
||||||
if (err != apm_->kNoError) {
|
if (err != apm_->kNoError) {
|
||||||
@ -129,7 +129,7 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
|||||||
// TODO(ajm): improve how this works, possibly inside AECM.
|
// TODO(ajm): improve how this works, possibly inside AECM.
|
||||||
// This is kind of hacked up.
|
// This is kind of hacked up.
|
||||||
const int16_t* noisy = audio->low_pass_reference(i);
|
const int16_t* noisy = audio->low_pass_reference(i);
|
||||||
const int16_t* clean = audio->split_data_const(i, kBand0To8kHz);
|
const int16_t* clean = audio->split_bands_const(i)[kBand0To8kHz];
|
||||||
if (noisy == NULL) {
|
if (noisy == NULL) {
|
||||||
noisy = clean;
|
noisy = clean;
|
||||||
clean = NULL;
|
clean = NULL;
|
||||||
@ -140,7 +140,7 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
|||||||
my_handle,
|
my_handle,
|
||||||
noisy,
|
noisy,
|
||||||
clean,
|
clean,
|
||||||
audio->split_data(i, kBand0To8kHz),
|
audio->split_bands(i)[kBand0To8kHz],
|
||||||
static_cast<int16_t>(audio->samples_per_split_channel()),
|
static_cast<int16_t>(audio->samples_per_split_channel()),
|
||||||
apm_->stream_delay_ms());
|
apm_->stream_delay_ms());
|
||||||
|
|
||||||
|
@ -90,8 +90,8 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
|
|||||||
Handle* my_handle = static_cast<Handle*>(handle(i));
|
Handle* my_handle = static_cast<Handle*>(handle(i));
|
||||||
err = WebRtcAgc_AddMic(
|
err = WebRtcAgc_AddMic(
|
||||||
my_handle,
|
my_handle,
|
||||||
audio->split_data(i, kBand0To8kHz),
|
audio->split_bands(i)[kBand0To8kHz],
|
||||||
audio->split_data(i, kBand8To16kHz),
|
audio->split_bands(i)[kBand8To16kHz],
|
||||||
static_cast<int16_t>(audio->samples_per_split_channel()));
|
static_cast<int16_t>(audio->samples_per_split_channel()));
|
||||||
|
|
||||||
if (err != apm_->kNoError) {
|
if (err != apm_->kNoError) {
|
||||||
@ -106,8 +106,8 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
|
|||||||
|
|
||||||
err = WebRtcAgc_VirtualMic(
|
err = WebRtcAgc_VirtualMic(
|
||||||
my_handle,
|
my_handle,
|
||||||
audio->split_data(i, kBand0To8kHz),
|
audio->split_bands(i)[kBand0To8kHz],
|
||||||
audio->split_data(i, kBand8To16kHz),
|
audio->split_bands(i)[kBand8To16kHz],
|
||||||
static_cast<int16_t>(audio->samples_per_split_channel()),
|
static_cast<int16_t>(audio->samples_per_split_channel()),
|
||||||
analog_capture_level_,
|
analog_capture_level_,
|
||||||
&capture_level_out);
|
&capture_level_out);
|
||||||
@ -144,11 +144,11 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
|||||||
|
|
||||||
int err = WebRtcAgc_Process(
|
int err = WebRtcAgc_Process(
|
||||||
my_handle,
|
my_handle,
|
||||||
audio->split_data_const(i, kBand0To8kHz),
|
audio->split_bands_const(i)[kBand0To8kHz],
|
||||||
audio->split_data_const(i, kBand8To16kHz),
|
audio->split_bands_const(i)[kBand8To16kHz],
|
||||||
static_cast<int16_t>(audio->samples_per_split_channel()),
|
static_cast<int16_t>(audio->samples_per_split_channel()),
|
||||||
audio->split_data(i, kBand0To8kHz),
|
audio->split_bands(i)[kBand0To8kHz],
|
||||||
audio->split_data(i, kBand8To16kHz),
|
audio->split_bands(i)[kBand8To16kHz],
|
||||||
capture_levels_[i],
|
capture_levels_[i],
|
||||||
&capture_level_out,
|
&capture_level_out,
|
||||||
apm_->echo_cancellation()->stream_has_echo(),
|
apm_->echo_cancellation()->stream_has_echo(),
|
||||||
|
@ -123,7 +123,7 @@ int HighPassFilterImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
|||||||
for (int i = 0; i < num_handles(); i++) {
|
for (int i = 0; i < num_handles(); i++) {
|
||||||
Handle* my_handle = static_cast<Handle*>(handle(i));
|
Handle* my_handle = static_cast<Handle*>(handle(i));
|
||||||
err = Filter(my_handle,
|
err = Filter(my_handle,
|
||||||
audio->split_data(i, kBand0To8kHz),
|
audio->split_bands(i)[kBand0To8kHz],
|
||||||
audio->samples_per_split_channel());
|
audio->samples_per_split_channel());
|
||||||
|
|
||||||
if (err != apm_->kNoError) {
|
if (err != apm_->kNoError) {
|
||||||
|
@ -66,19 +66,13 @@ int NoiseSuppressionImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
|
|||||||
for (int i = 0; i < num_handles(); ++i) {
|
for (int i = 0; i < num_handles(); ++i) {
|
||||||
Handle* my_handle = static_cast<Handle*>(handle(i));
|
Handle* my_handle = static_cast<Handle*>(handle(i));
|
||||||
|
|
||||||
int err = WebRtcNs_Analyze(my_handle,
|
WebRtcNs_Analyze(my_handle, audio->split_bands_const_f(i)[kBand0To8kHz]);
|
||||||
audio->split_data_f(i, kBand0To8kHz));
|
|
||||||
if (err != apm_->kNoError) {
|
|
||||||
return GetHandleError(my_handle);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
return apm_->kNoError;
|
return apm_->kNoError;
|
||||||
}
|
}
|
||||||
|
|
||||||
int NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
int NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
||||||
int err = apm_->kNoError;
|
|
||||||
|
|
||||||
if (!is_component_enabled()) {
|
if (!is_component_enabled()) {
|
||||||
return apm_->kNoError;
|
return apm_->kNoError;
|
||||||
}
|
}
|
||||||
@ -88,24 +82,17 @@ int NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
|
|||||||
for (int i = 0; i < num_handles(); ++i) {
|
for (int i = 0; i < num_handles(); ++i) {
|
||||||
Handle* my_handle = static_cast<Handle*>(handle(i));
|
Handle* my_handle = static_cast<Handle*>(handle(i));
|
||||||
#if defined(WEBRTC_NS_FLOAT)
|
#if defined(WEBRTC_NS_FLOAT)
|
||||||
err = WebRtcNs_Process(my_handle,
|
WebRtcNs_Process(my_handle,
|
||||||
audio->split_data_f(i, kBand0To8kHz),
|
audio->split_bands_const_f(i),
|
||||||
audio->split_data_f(i, kBand8To16kHz),
|
audio->num_bands(),
|
||||||
audio->split_data_f(i, kBand0To8kHz),
|
audio->split_bands_f(i));
|
||||||
audio->split_data_f(i, kBand8To16kHz));
|
|
||||||
#elif defined(WEBRTC_NS_FIXED)
|
#elif defined(WEBRTC_NS_FIXED)
|
||||||
err = WebRtcNsx_Process(my_handle,
|
WebRtcNsx_Process(my_handle,
|
||||||
audio->split_data(i, kBand0To8kHz),
|
audio->split_bands_const(i),
|
||||||
audio->split_data(i, kBand8To16kHz),
|
audio->num_bands(),
|
||||||
audio->split_data(i, kBand0To8kHz),
|
audio->split_bands(i));
|
||||||
audio->split_data(i, kBand8To16kHz));
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (err != apm_->kNoError) {
|
|
||||||
return GetHandleError(my_handle);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return apm_->kNoError;
|
return apm_->kNoError;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
#define BLOCKL_MAX 160 // max processing block length: 160
|
#define BLOCKL_MAX 160 // max processing block length: 160
|
||||||
#define ANAL_BLOCKL_MAX 256 // max analysis block length: 256
|
#define ANAL_BLOCKL_MAX 256 // max analysis block length: 256
|
||||||
#define HALF_ANAL_BLOCKL 129 // half max analysis block length + 1
|
#define HALF_ANAL_BLOCKL 129 // half max analysis block length + 1
|
||||||
|
#define NUM_HIGH_BANDS_MAX 2 // max number of high bands: 2
|
||||||
|
|
||||||
#define QUANTILE (float)0.25
|
#define QUANTILE (float)0.25
|
||||||
|
|
||||||
|
@ -89,11 +89,8 @@ int WebRtcNs_set_policy(NsHandle* NS_inst, int mode);
|
|||||||
*
|
*
|
||||||
* Output:
|
* Output:
|
||||||
* - NS_inst : Updated NS instance
|
* - NS_inst : Updated NS instance
|
||||||
*
|
|
||||||
* Return value : 0 - OK
|
|
||||||
* -1 - Error
|
|
||||||
*/
|
*/
|
||||||
int WebRtcNs_Analyze(NsHandle* NS_inst, float* spframe);
|
void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This functions does Noise Suppression for the inserted speech frame. The
|
* This functions does Noise Suppression for the inserted speech frame. The
|
||||||
@ -101,23 +98,17 @@ int WebRtcNs_Analyze(NsHandle* NS_inst, float* spframe);
|
|||||||
*
|
*
|
||||||
* Input
|
* Input
|
||||||
* - NS_inst : Noise suppression instance.
|
* - NS_inst : Noise suppression instance.
|
||||||
* - spframe : Pointer to speech frame buffer for L band
|
* - spframe : Pointer to speech frame buffer for each band
|
||||||
* - spframe_H : Pointer to speech frame buffer for H band
|
* - num_bands : Number of bands
|
||||||
* - fs : sampling frequency
|
|
||||||
*
|
*
|
||||||
* Output:
|
* Output:
|
||||||
* - NS_inst : Updated NS instance
|
* - NS_inst : Updated NS instance
|
||||||
* - outframe : Pointer to output frame for L band
|
* - outframe : Pointer to output frame for each band
|
||||||
* - outframe_H : Pointer to output frame for H band
|
|
||||||
*
|
|
||||||
* Return value : 0 - OK
|
|
||||||
* -1 - Error
|
|
||||||
*/
|
*/
|
||||||
int WebRtcNs_Process(NsHandle* NS_inst,
|
void WebRtcNs_Process(NsHandle* NS_inst,
|
||||||
float* spframe,
|
const float* const* spframe,
|
||||||
float* spframe_H,
|
int num_bands,
|
||||||
float* outframe,
|
float* const* outframe);
|
||||||
float* outframe_H);
|
|
||||||
|
|
||||||
/* Returns the internally used prior speech probability of the current frame.
|
/* Returns the internally used prior speech probability of the current frame.
|
||||||
* There is a frequency bin based one as well, with which this should not be
|
* There is a frequency bin based one as well, with which this should not be
|
||||||
|
@ -84,23 +84,17 @@ int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode);
|
|||||||
*
|
*
|
||||||
* Input
|
* Input
|
||||||
* - nsxInst : NSx instance. Needs to be initiated before call.
|
* - nsxInst : NSx instance. Needs to be initiated before call.
|
||||||
* - speechFrame : Pointer to speech frame buffer for L band
|
* - speechFrame : Pointer to speech frame buffer for each band
|
||||||
* - speechFrameHB : Pointer to speech frame buffer for H band
|
* - num_bands : Number of bands
|
||||||
* - fs : sampling frequency
|
|
||||||
*
|
*
|
||||||
* Output:
|
* Output:
|
||||||
* - nsxInst : Updated NSx instance
|
* - nsxInst : Updated NSx instance
|
||||||
* - outFrame : Pointer to output frame for L band
|
* - outFrame : Pointer to output frame for each band
|
||||||
* - outFrameHB : Pointer to output frame for H band
|
|
||||||
*
|
|
||||||
* Return value : 0 - OK
|
|
||||||
* -1 - Error
|
|
||||||
*/
|
*/
|
||||||
int WebRtcNsx_Process(NsxHandle* nsxInst,
|
void WebRtcNsx_Process(NsxHandle* nsxInst,
|
||||||
short* speechFrame,
|
const short* const* speechFrame,
|
||||||
short* speechFrameHB,
|
int num_bands,
|
||||||
short* outFrame,
|
short* const* outFrame);
|
||||||
short* outFrameHB);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
@ -42,14 +42,15 @@ int WebRtcNs_set_policy(NsHandle* NS_inst, int mode) {
|
|||||||
return WebRtcNs_set_policy_core((NSinst_t*) NS_inst, mode);
|
return WebRtcNs_set_policy_core((NSinst_t*) NS_inst, mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
int WebRtcNs_Analyze(NsHandle* NS_inst, float* spframe) {
|
void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe) {
|
||||||
return WebRtcNs_AnalyzeCore((NSinst_t*) NS_inst, spframe);
|
WebRtcNs_AnalyzeCore((NSinst_t*) NS_inst, spframe);
|
||||||
}
|
}
|
||||||
|
|
||||||
int WebRtcNs_Process(NsHandle* NS_inst, float* spframe, float* spframe_H,
|
void WebRtcNs_Process(NsHandle* NS_inst,
|
||||||
float* outframe, float* outframe_H) {
|
const float* const* spframe,
|
||||||
return WebRtcNs_ProcessCore(
|
int num_bands,
|
||||||
(NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H);
|
float* const* outframe) {
|
||||||
|
WebRtcNs_ProcessCore((NSinst_t*)NS_inst, spframe, num_bands, outframe);
|
||||||
}
|
}
|
||||||
|
|
||||||
float WebRtcNs_prior_speech_probability(NsHandle* handle) {
|
float WebRtcNs_prior_speech_probability(NsHandle* handle) {
|
||||||
|
@ -45,9 +45,9 @@ int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode) {
|
|||||||
return WebRtcNsx_set_policy_core((NsxInst_t*)nsxInst, mode);
|
return WebRtcNsx_set_policy_core((NsxInst_t*)nsxInst, mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
int WebRtcNsx_Process(NsxHandle* nsxInst, short* speechFrame,
|
void WebRtcNsx_Process(NsxHandle* nsxInst,
|
||||||
short* speechFrameHB, short* outFrame,
|
const short* const* speechFrame,
|
||||||
short* outFrameHB) {
|
int num_bands,
|
||||||
return WebRtcNsx_ProcessCore(
|
short* const* outFrame) {
|
||||||
(NsxInst_t*)nsxInst, speechFrame, speechFrameHB, outFrame, outFrameHB);
|
WebRtcNsx_ProcessCore((NsxInst_t*)nsxInst, speechFrame, num_bands, outFrame);
|
||||||
}
|
}
|
||||||
|
@ -79,24 +79,18 @@ int WebRtcNs_InitCore(NSinst_t* self, uint32_t fs) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Initialization of struct.
|
// Initialization of struct.
|
||||||
if (fs == 8000 || fs == 16000 || fs == 32000) {
|
if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) {
|
||||||
self->fs = fs;
|
self->fs = fs;
|
||||||
} else {
|
} else {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
self->windShift = 0;
|
self->windShift = 0;
|
||||||
|
// We only support 10ms frames.
|
||||||
if (fs == 8000) {
|
if (fs == 8000) {
|
||||||
// We only support 10ms frames.
|
|
||||||
self->blockLen = 80;
|
self->blockLen = 80;
|
||||||
self->anaLen = 128;
|
self->anaLen = 128;
|
||||||
self->window = kBlocks80w128;
|
self->window = kBlocks80w128;
|
||||||
} else if (fs == 16000) {
|
} else {
|
||||||
// We only support 10ms frames.
|
|
||||||
self->blockLen = 160;
|
|
||||||
self->anaLen = 256;
|
|
||||||
self->window = kBlocks160w256;
|
|
||||||
} else if (fs == 32000) {
|
|
||||||
// We only support 10ms frames.
|
|
||||||
self->blockLen = 160;
|
self->blockLen = 160;
|
||||||
self->anaLen = 256;
|
self->anaLen = 256;
|
||||||
self->window = kBlocks160w256;
|
self->window = kBlocks160w256;
|
||||||
@ -113,7 +107,9 @@ int WebRtcNs_InitCore(NSinst_t* self, uint32_t fs) {
|
|||||||
memset(self->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
|
memset(self->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX);
|
||||||
|
|
||||||
// For HB processing.
|
// For HB processing.
|
||||||
memset(self->dataBufHB, 0, sizeof(float) * ANAL_BLOCKL_MAX);
|
memset(self->dataBufHB,
|
||||||
|
0,
|
||||||
|
sizeof(float) * NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX);
|
||||||
|
|
||||||
// For quantile noise estimation.
|
// For quantile noise estimation.
|
||||||
memset(self->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL);
|
memset(self->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL);
|
||||||
@ -1041,7 +1037,7 @@ int WebRtcNs_set_policy_core(NSinst_t* self, int mode) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame) {
|
void WebRtcNs_AnalyzeCore(NSinst_t* self, const float* speechFrame) {
|
||||||
int i;
|
int i;
|
||||||
const int kStartBand = 5; // Skip first frequency bins during estimation.
|
const int kStartBand = 5; // Skip first frequency bins during estimation.
|
||||||
int updateParsFlag;
|
int updateParsFlag;
|
||||||
@ -1062,9 +1058,7 @@ int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame) {
|
|||||||
float parametric_num = 0.0;
|
float parametric_num = 0.0;
|
||||||
|
|
||||||
// Check that initiation has been done.
|
// Check that initiation has been done.
|
||||||
if (self->initFlag != 1) {
|
assert(self->initFlag == 1);
|
||||||
return (-1);
|
|
||||||
}
|
|
||||||
updateParsFlag = self->modelUpdatePars[0];
|
updateParsFlag = self->modelUpdatePars[0];
|
||||||
|
|
||||||
// Update analysis buffer for L band.
|
// Update analysis buffer for L band.
|
||||||
@ -1081,7 +1075,7 @@ int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame) {
|
|||||||
// Depending on the duration of the inactive signal it takes a
|
// Depending on the duration of the inactive signal it takes a
|
||||||
// considerable amount of time for the system to learn what is noise and
|
// considerable amount of time for the system to learn what is noise and
|
||||||
// what is speech.
|
// what is speech.
|
||||||
return 0;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
self->blockInd++; // Update the block index only when we process a block.
|
self->blockInd++; // Update the block index only when we process a block.
|
||||||
@ -1181,18 +1175,15 @@ int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame) {
|
|||||||
// Keep track of noise spectrum for next frame.
|
// Keep track of noise spectrum for next frame.
|
||||||
memcpy(self->noise, noise, sizeof(*noise) * self->magnLen);
|
memcpy(self->noise, noise, sizeof(*noise) * self->magnLen);
|
||||||
memcpy(self->magnPrevAnalyze, magn, sizeof(*magn) * self->magnLen);
|
memcpy(self->magnPrevAnalyze, magn, sizeof(*magn) * self->magnLen);
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int WebRtcNs_ProcessCore(NSinst_t* self,
|
void WebRtcNs_ProcessCore(NSinst_t* self,
|
||||||
float* speechFrame,
|
const float* const* speechFrame,
|
||||||
float* speechFrameHB,
|
int num_bands,
|
||||||
float* outFrame,
|
float* const* outFrame) {
|
||||||
float* outFrameHB) {
|
|
||||||
// Main routine for noise reduction.
|
// Main routine for noise reduction.
|
||||||
int flagHB = 0;
|
int flagHB = 0;
|
||||||
int i;
|
int i, j;
|
||||||
|
|
||||||
float energy1, energy2, gain, factor, factor1, factor2;
|
float energy1, energy2, gain, factor, factor1, factor2;
|
||||||
float fout[BLOCKL_MAX];
|
float fout[BLOCKL_MAX];
|
||||||
@ -1211,14 +1202,16 @@ int WebRtcNs_ProcessCore(NSinst_t* self,
|
|||||||
float sumMagnAnalyze, sumMagnProcess;
|
float sumMagnAnalyze, sumMagnProcess;
|
||||||
|
|
||||||
// Check that initiation has been done.
|
// Check that initiation has been done.
|
||||||
if (self->initFlag != 1) {
|
assert(self->initFlag == 1);
|
||||||
return (-1);
|
assert((num_bands - 1) <= NUM_HIGH_BANDS_MAX);
|
||||||
}
|
|
||||||
// Check for valid pointers based on sampling rate.
|
const float* const* speechFrameHB = NULL;
|
||||||
if (self->fs == 32000) {
|
float* const* outFrameHB = NULL;
|
||||||
if (speechFrameHB == NULL) {
|
int num_high_bands = 0;
|
||||||
return -1;
|
if (num_bands > 1) {
|
||||||
}
|
speechFrameHB = &speechFrame[1];
|
||||||
|
outFrameHB = &outFrame[1];
|
||||||
|
num_high_bands = num_bands - 1;
|
||||||
flagHB = 1;
|
flagHB = 1;
|
||||||
// Range for averaging low band quantities for H band gain.
|
// Range for averaging low band quantities for H band gain.
|
||||||
deltaBweHB = (int)self->magnLen / 4;
|
deltaBweHB = (int)self->magnLen / 4;
|
||||||
@ -1226,11 +1219,16 @@ int WebRtcNs_ProcessCore(NSinst_t* self,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Update analysis buffer for L band.
|
// Update analysis buffer for L band.
|
||||||
UpdateBuffer(speechFrame, self->blockLen, self->anaLen, self->dataBuf);
|
UpdateBuffer(speechFrame[0], self->blockLen, self->anaLen, self->dataBuf);
|
||||||
|
|
||||||
if (flagHB == 1) {
|
if (flagHB == 1) {
|
||||||
// Update analysis buffer for H band.
|
// Update analysis buffer for H bands.
|
||||||
UpdateBuffer(speechFrameHB, self->blockLen, self->anaLen, self->dataBufHB);
|
for (i = 0; i < num_high_bands; ++i) {
|
||||||
|
UpdateBuffer(speechFrameHB[i],
|
||||||
|
self->blockLen,
|
||||||
|
self->anaLen,
|
||||||
|
self->dataBufHB[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Windowing(self->window, self->dataBuf, self->anaLen, winData);
|
Windowing(self->window, self->dataBuf, self->anaLen, winData);
|
||||||
@ -1245,16 +1243,21 @@ int WebRtcNs_ProcessCore(NSinst_t* self,
|
|||||||
UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);
|
UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);
|
||||||
|
|
||||||
for (i = 0; i < self->blockLen; ++i)
|
for (i = 0; i < self->blockLen; ++i)
|
||||||
outFrame[i] =
|
outFrame[0][i] =
|
||||||
WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
|
WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
|
||||||
|
|
||||||
// For time-domain gain of HB.
|
// For time-domain gain of HB.
|
||||||
if (flagHB == 1)
|
if (flagHB == 1) {
|
||||||
for (i = 0; i < self->blockLen; ++i)
|
for (i = 0; i < num_high_bands; ++i) {
|
||||||
outFrameHB[i] = WEBRTC_SPL_SAT(
|
for (j = 0; j < self->blockLen; ++j) {
|
||||||
WEBRTC_SPL_WORD16_MAX, self->dataBufHB[i], WEBRTC_SPL_WORD16_MIN);
|
outFrameHB[i][j] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
|
||||||
|
self->dataBufHB[i][j],
|
||||||
|
WEBRTC_SPL_WORD16_MIN);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn);
|
FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn);
|
||||||
@ -1349,7 +1352,7 @@ int WebRtcNs_ProcessCore(NSinst_t* self,
|
|||||||
UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);
|
UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf);
|
||||||
|
|
||||||
for (i = 0; i < self->blockLen; ++i)
|
for (i = 0; i < self->blockLen; ++i)
|
||||||
outFrame[i] =
|
outFrame[0][i] =
|
||||||
WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
|
WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN);
|
||||||
|
|
||||||
// For time-domain gain of HB.
|
// For time-domain gain of HB.
|
||||||
@ -1397,12 +1400,13 @@ int WebRtcNs_ProcessCore(NSinst_t* self,
|
|||||||
gainTimeDomainHB = 1.f;
|
gainTimeDomainHB = 1.f;
|
||||||
}
|
}
|
||||||
// Apply gain.
|
// Apply gain.
|
||||||
for (i = 0; i < self->blockLen; i++) {
|
for (i = 0; i < num_high_bands; ++i) {
|
||||||
float o = gainTimeDomainHB * self->dataBufHB[i];
|
for (j = 0; j < self->blockLen; j++) {
|
||||||
outFrameHB[i] =
|
outFrameHB[i][j] =
|
||||||
WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, o, WEBRTC_SPL_WORD16_MIN);
|
WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
|
||||||
|
gainTimeDomainHB * self->dataBufHB[i][j],
|
||||||
|
WEBRTC_SPL_WORD16_MIN);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} // End of H band gain computation.
|
} // End of H band gain computation.
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
@ -108,7 +108,8 @@ typedef struct NSinst_t_ {
|
|||||||
int histSpecDiff[HIST_PAR_EST];
|
int histSpecDiff[HIST_PAR_EST];
|
||||||
// Quantities for high band estimate.
|
// Quantities for high band estimate.
|
||||||
float speechProb[HALF_ANAL_BLOCKL]; // Final speech/noise prob: prior + LRT.
|
float speechProb[HALF_ANAL_BLOCKL]; // Final speech/noise prob: prior + LRT.
|
||||||
float dataBufHB[ANAL_BLOCKL_MAX]; // Buffering data for HB.
|
// Buffering data for HB.
|
||||||
|
float dataBufHB[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];
|
||||||
|
|
||||||
} NSinst_t;
|
} NSinst_t;
|
||||||
|
|
||||||
@ -161,11 +162,8 @@ int WebRtcNs_set_policy_core(NSinst_t* self, int mode);
|
|||||||
*
|
*
|
||||||
* Output:
|
* Output:
|
||||||
* - self : Updated instance
|
* - self : Updated instance
|
||||||
*
|
|
||||||
* Return value : 0 - OK
|
|
||||||
* -1 - Error
|
|
||||||
*/
|
*/
|
||||||
int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame);
|
void WebRtcNs_AnalyzeCore(NSinst_t* self, const float* speechFrame);
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* WebRtcNs_ProcessCore
|
* WebRtcNs_ProcessCore
|
||||||
@ -174,22 +172,17 @@ int WebRtcNs_AnalyzeCore(NSinst_t* self, float* speechFrame);
|
|||||||
*
|
*
|
||||||
* Input:
|
* Input:
|
||||||
* - self : Instance that should be initialized
|
* - self : Instance that should be initialized
|
||||||
* - inFrameLow : Input speech frame for lower band
|
* - inFrame : Input speech frame for each band
|
||||||
* - inFrameHigh : Input speech frame for higher band
|
* - num_bands : Number of bands
|
||||||
*
|
*
|
||||||
* Output:
|
* Output:
|
||||||
* - self : Updated instance
|
* - self : Updated instance
|
||||||
* - outFrameLow : Output speech frame for lower band
|
* - outFrame : Output speech frame for each band
|
||||||
* - outFrameHigh : Output speech frame for higher band
|
|
||||||
*
|
|
||||||
* Return value : 0 - OK
|
|
||||||
* -1 - Error
|
|
||||||
*/
|
*/
|
||||||
int WebRtcNs_ProcessCore(NSinst_t* self,
|
void WebRtcNs_ProcessCore(NSinst_t* self,
|
||||||
float* inFrameLow,
|
const float* const* inFrame,
|
||||||
float* inFrameHigh,
|
int num_bands,
|
||||||
float* outFrameLow,
|
float* const* outFrame);
|
||||||
float* outFrameHigh);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
@ -637,7 +637,7 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) {
|
|||||||
//
|
//
|
||||||
|
|
||||||
// Initialization of struct
|
// Initialization of struct
|
||||||
if (fs == 8000 || fs == 16000 || fs == 32000) {
|
if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) {
|
||||||
inst->fs = fs;
|
inst->fs = fs;
|
||||||
} else {
|
} else {
|
||||||
return -1;
|
return -1;
|
||||||
@ -651,15 +651,7 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) {
|
|||||||
inst->thresholdLogLrt = 131072; //default threshold for LRT feature
|
inst->thresholdLogLrt = 131072; //default threshold for LRT feature
|
||||||
inst->maxLrt = 0x0040000;
|
inst->maxLrt = 0x0040000;
|
||||||
inst->minLrt = 52429;
|
inst->minLrt = 52429;
|
||||||
} else if (fs == 16000) {
|
} else {
|
||||||
inst->blockLen10ms = 160;
|
|
||||||
inst->anaLen = 256;
|
|
||||||
inst->stages = 8;
|
|
||||||
inst->window = kBlocks160w256x;
|
|
||||||
inst->thresholdLogLrt = 212644; //default threshold for LRT feature
|
|
||||||
inst->maxLrt = 0x0080000;
|
|
||||||
inst->minLrt = 104858;
|
|
||||||
} else if (fs == 32000) {
|
|
||||||
inst->blockLen10ms = 160;
|
inst->blockLen10ms = 160;
|
||||||
inst->anaLen = 256;
|
inst->anaLen = 256;
|
||||||
inst->stages = 8;
|
inst->stages = 8;
|
||||||
@ -683,7 +675,8 @@ int32_t WebRtcNsx_InitCore(NsxInst_t* inst, uint32_t fs) {
|
|||||||
WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer, ANAL_BLOCKL_MAX);
|
WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer, ANAL_BLOCKL_MAX);
|
||||||
|
|
||||||
// for HB processing
|
// for HB processing
|
||||||
WebRtcSpl_ZerosArrayW16(inst->dataBufHBFX, ANAL_BLOCKL_MAX);
|
WebRtcSpl_ZerosArrayW16(inst->dataBufHBFX[0],
|
||||||
|
NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX);
|
||||||
// for quantile noise estimation
|
// for quantile noise estimation
|
||||||
WebRtcSpl_ZerosArrayW16(inst->noiseEstQuantile, HALF_ANAL_BLOCKL);
|
WebRtcSpl_ZerosArrayW16(inst->noiseEstQuantile, HALF_ANAL_BLOCKL);
|
||||||
for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) {
|
for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) {
|
||||||
@ -1502,8 +1495,10 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) {
|
|||||||
WebRtcNsx_SynthesisUpdate(inst, outFrame, gainFactor);
|
WebRtcNsx_SynthesisUpdate(inst, outFrame, gainFactor);
|
||||||
}
|
}
|
||||||
|
|
||||||
int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFrameHB,
|
void WebRtcNsx_ProcessCore(NsxInst_t* inst,
|
||||||
short* outFrame, short* outFrameHB) {
|
const short* const* speechFrame,
|
||||||
|
int num_bands,
|
||||||
|
short* const* outFrame) {
|
||||||
// main routine for noise suppression
|
// main routine for noise suppression
|
||||||
|
|
||||||
uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
|
uint32_t tmpU32no1, tmpU32no2, tmpU32no3;
|
||||||
@ -1535,7 +1530,7 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
|
|||||||
int16_t avgProbSpeechHB, gainModHB, avgFilterGainHB, gainTimeDomainHB;
|
int16_t avgProbSpeechHB, gainModHB, avgFilterGainHB, gainTimeDomainHB;
|
||||||
int16_t pink_noise_exp_avg = 0;
|
int16_t pink_noise_exp_avg = 0;
|
||||||
|
|
||||||
int i;
|
int i, j;
|
||||||
int nShifts, postShifts;
|
int nShifts, postShifts;
|
||||||
int norm32no1, norm32no2;
|
int norm32no1, norm32no2;
|
||||||
int flag, sign;
|
int flag, sign;
|
||||||
@ -1553,37 +1548,46 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
|
|||||||
#ifdef NS_FILEDEBUG
|
#ifdef NS_FILEDEBUG
|
||||||
if (fwrite(spframe, sizeof(short),
|
if (fwrite(spframe, sizeof(short),
|
||||||
inst->blockLen10ms, inst->infile) != inst->blockLen10ms) {
|
inst->blockLen10ms, inst->infile) != inst->blockLen10ms) {
|
||||||
return -1;
|
assert(false);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Check that initialization has been done
|
// Check that initialization has been done
|
||||||
if (inst->initFlag != 1) {
|
assert(inst->initFlag == 1);
|
||||||
return -1;
|
assert((num_bands - 1) <= NUM_HIGH_BANDS_MAX);
|
||||||
}
|
|
||||||
// Check for valid pointers based on sampling rate
|
const short* const* speechFrameHB = NULL;
|
||||||
if ((inst->fs == 32000) && (speechFrameHB == NULL)) {
|
short* const* outFrameHB = NULL;
|
||||||
return -1;
|
int num_high_bands = 0;
|
||||||
|
if (num_bands > 1) {
|
||||||
|
speechFrameHB = &speechFrame[1];
|
||||||
|
outFrameHB = &outFrame[1];
|
||||||
|
num_high_bands = num_bands - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store speechFrame and transform to frequency domain
|
// Store speechFrame and transform to frequency domain
|
||||||
WebRtcNsx_DataAnalysis(inst, speechFrame, magnU16);
|
WebRtcNsx_DataAnalysis(inst, (short*)speechFrame[0], magnU16);
|
||||||
|
|
||||||
if (inst->zeroInputSignal) {
|
if (inst->zeroInputSignal) {
|
||||||
WebRtcNsx_DataSynthesis(inst, outFrame);
|
WebRtcNsx_DataSynthesis(inst, outFrame[0]);
|
||||||
|
|
||||||
if (inst->fs == 32000) {
|
if (num_bands > 1) {
|
||||||
// update analysis buffer for H band
|
// update analysis buffer for H band
|
||||||
// append new data to buffer FX
|
// append new data to buffer FX
|
||||||
WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX, inst->dataBufHBFX + inst->blockLen10ms,
|
for (i = 0; i < num_high_bands; ++i) {
|
||||||
inst->anaLen - inst->blockLen10ms);
|
WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX[i],
|
||||||
WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX + inst->anaLen - inst->blockLen10ms,
|
inst->dataBufHBFX[i] + inst->blockLen10ms,
|
||||||
speechFrameHB, inst->blockLen10ms);
|
inst->anaLen - inst->blockLen10ms);
|
||||||
for (i = 0; i < inst->blockLen10ms; i++) {
|
WEBRTC_SPL_MEMCPY_W16(
|
||||||
outFrameHB[i] = inst->dataBufHBFX[i]; // Q0
|
inst->dataBufHBFX[i] + inst->anaLen - inst->blockLen10ms,
|
||||||
|
speechFrameHB[i],
|
||||||
|
inst->blockLen10ms);
|
||||||
|
for (j = 0; j < inst->blockLen10ms; j++) {
|
||||||
|
outFrameHB[i][j] = inst->dataBufHBFX[i][j]; // Q0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} // end of H band gain computation
|
} // end of H band gain computation
|
||||||
return 0;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update block index when we have something to process
|
// Update block index when we have something to process
|
||||||
@ -2022,21 +2026,28 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
WebRtcNsx_DataSynthesis(inst, outFrame);
|
WebRtcNsx_DataSynthesis(inst, outFrame[0]);
|
||||||
#ifdef NS_FILEDEBUG
|
#ifdef NS_FILEDEBUG
|
||||||
if (fwrite(outframe, sizeof(short),
|
if (fwrite(outframe, sizeof(short),
|
||||||
inst->blockLen10ms, inst->outfile) != inst->blockLen10ms) {
|
inst->blockLen10ms, inst->outfile) != inst->blockLen10ms) {
|
||||||
return -1;
|
assert(false);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
//for H band:
|
//for H band:
|
||||||
// only update data buffer, then apply time-domain gain is applied derived from L band
|
// only update data buffer, then apply time-domain gain is applied derived from L band
|
||||||
if (inst->fs == 32000) {
|
if (num_bands > 1) {
|
||||||
// update analysis buffer for H band
|
// update analysis buffer for H band
|
||||||
// append new data to buffer FX
|
// append new data to buffer FX
|
||||||
WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX, inst->dataBufHBFX + inst->blockLen10ms, inst->anaLen - inst->blockLen10ms);
|
for (i = 0; i < num_high_bands; ++i) {
|
||||||
WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX + inst->anaLen - inst->blockLen10ms, speechFrameHB, inst->blockLen10ms);
|
WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX[i],
|
||||||
|
inst->dataBufHBFX[i] + inst->blockLen10ms,
|
||||||
|
inst->anaLen - inst->blockLen10ms);
|
||||||
|
WEBRTC_SPL_MEMCPY_W16(
|
||||||
|
inst->dataBufHBFX[i] + inst->anaLen - inst->blockLen10ms,
|
||||||
|
speechFrameHB[i],
|
||||||
|
inst->blockLen10ms);
|
||||||
|
}
|
||||||
// range for averaging low band quantities for H band gain
|
// range for averaging low band quantities for H band gain
|
||||||
|
|
||||||
gainTimeDomainHB = 16384; // 16384 = Q14(1.0)
|
gainTimeDomainHB = 16384; // 16384 = Q14(1.0)
|
||||||
@ -2094,11 +2105,13 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram
|
|||||||
|
|
||||||
|
|
||||||
//apply gain
|
//apply gain
|
||||||
for (i = 0; i < inst->blockLen10ms; i++) {
|
for (i = 0; i < num_high_bands; ++i) {
|
||||||
outFrameHB[i]
|
for (j = 0; j < inst->blockLen10ms; j++) {
|
||||||
= (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(gainTimeDomainHB, inst->dataBufHBFX[i], 14); // Q0
|
outFrameHB[i][j] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(
|
||||||
|
gainTimeDomainHB,
|
||||||
|
inst->dataBufHBFX[i][j],
|
||||||
|
14); // Q0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} // end of H band gain computation
|
} // end of H band gain computation
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
@ -93,7 +93,7 @@ typedef struct NsxInst_t_ {
|
|||||||
int16_t histSpecDiff[HIST_PAR_EST];
|
int16_t histSpecDiff[HIST_PAR_EST];
|
||||||
|
|
||||||
// Quantities for high band estimate.
|
// Quantities for high band estimate.
|
||||||
int16_t dataBufHBFX[ANAL_BLOCKL_MAX]; // Q0
|
int16_t dataBufHBFX[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];
|
||||||
|
|
||||||
int qNoise;
|
int qNoise;
|
||||||
int prevQNoise;
|
int prevQNoise;
|
||||||
@ -155,25 +155,20 @@ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode);
|
|||||||
*
|
*
|
||||||
* Input:
|
* Input:
|
||||||
* - inst : Instance that should be initialized
|
* - inst : Instance that should be initialized
|
||||||
* - inFrameLow : Input speech frame for lower band
|
* - inFrame : Input speech frame for each band
|
||||||
* - inFrameHigh : Input speech frame for higher band
|
* - num_bands : Number of bands
|
||||||
*
|
*
|
||||||
* Output:
|
* Output:
|
||||||
* - inst : Updated instance
|
* - inst : Updated instance
|
||||||
* - outFrameLow : Output speech frame for lower band
|
* - outFrame : Output speech frame for each band
|
||||||
* - outFrameHigh : Output speech frame for higher band
|
|
||||||
*
|
|
||||||
* Return value : 0 - OK
|
|
||||||
* -1 - Error
|
|
||||||
*/
|
*/
|
||||||
int WebRtcNsx_ProcessCore(NsxInst_t* inst,
|
void WebRtcNsx_ProcessCore(NsxInst_t* inst,
|
||||||
short* inFrameLow,
|
const short* const* inFrame,
|
||||||
short* inFrameHigh,
|
int num_bands,
|
||||||
short* outFrameLow,
|
short* const* outFrame);
|
||||||
short* outFrameHigh);
|
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* Some function pointers, for internal functions shared by ARM NEON and
|
* Some function pointers, for internal functions shared by ARM NEON and
|
||||||
* generic C code.
|
* generic C code.
|
||||||
*/
|
*/
|
||||||
// Noise Estimation.
|
// Noise Estimation.
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
|
|
||||||
#define ANAL_BLOCKL_MAX 256 /* Max analysis block length */
|
#define ANAL_BLOCKL_MAX 256 /* Max analysis block length */
|
||||||
#define HALF_ANAL_BLOCKL 129 /* Half max analysis block length + 1 */
|
#define HALF_ANAL_BLOCKL 129 /* Half max analysis block length + 1 */
|
||||||
|
#define NUM_HIGH_BANDS_MAX 2 /* Max number of high bands */
|
||||||
#define SIMULT 3
|
#define SIMULT 3
|
||||||
#define END_STARTUP_LONG 200
|
#define END_STARTUP_LONG 200
|
||||||
#define END_STARTUP_SHORT 50
|
#define END_STARTUP_SHORT 50
|
||||||
|
Loading…
x
Reference in New Issue
Block a user