Support arbitrary input/output rates and downmixing in AudioProcessing.

Select "processing" rates based on the input and output sampling rates.
Resample the input streams to those rates, and if necessary to the
output rate.

- Remove deprecated stream format APIs.
- Remove deprecated device sample rate APIs.
- Add a ChannelBuffer class to help manage deinterleaved channels.
- Clean up the splitting filter state.
- Add a unit test which verifies the output against known-working
native format output.

BUG=2894
R=aluebs@webrtc.org, bjornv@webrtc.org, xians@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/9919004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@5959 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
andrew@webrtc.org 2014-04-22 21:00:04 +00:00
parent 34fe0153b9
commit ddbb8a2c24
23 changed files with 1291 additions and 693 deletions

View File

@ -11,6 +11,7 @@
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
namespace webrtc {
@ -22,76 +23,166 @@ enum {
kSamplesPer32kHzChannel = 320
};
void StereoToMono(const int16_t* left, const int16_t* right,
int16_t* out, int samples_per_channel) {
assert(left != NULL && right != NULL && out != NULL);
for (int i = 0; i < samples_per_channel; i++)
out[i] = (static_cast<int32_t>(left[i]) +
static_cast<int32_t>(right[i])) >> 1;
void StereoToMono(const float* left, const float* right, float* out,
int samples_per_channel) {
for (int i = 0; i < samples_per_channel; ++i) {
out[i] = (left[i] + right[i]) / 2;
}
}
void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
int samples_per_channel) {
for (int i = 0; i < samples_per_channel; i++)
out[i] = (left[i] + right[i]) >> 1;
}
} // namespace
struct AudioChannel {
AudioChannel() {
memset(data, 0, sizeof(data));
class SplitChannelBuffer {
public:
SplitChannelBuffer(int samples_per_split_channel, int num_channels)
: low_(samples_per_split_channel, num_channels),
high_(samples_per_split_channel, num_channels) {
}
~SplitChannelBuffer() {}
int16_t data[kSamplesPer32kHzChannel];
int16_t* low_channel(int i) { return low_.channel(i); }
int16_t* high_channel(int i) { return high_.channel(i); }
private:
ChannelBuffer<int16_t> low_;
ChannelBuffer<int16_t> high_;
};
struct SplitAudioChannel {
SplitAudioChannel() {
memset(low_pass_data, 0, sizeof(low_pass_data));
memset(high_pass_data, 0, sizeof(high_pass_data));
memset(analysis_filter_state1, 0, sizeof(analysis_filter_state1));
memset(analysis_filter_state2, 0, sizeof(analysis_filter_state2));
memset(synthesis_filter_state1, 0, sizeof(synthesis_filter_state1));
memset(synthesis_filter_state2, 0, sizeof(synthesis_filter_state2));
}
int16_t low_pass_data[kSamplesPer16kHzChannel];
int16_t high_pass_data[kSamplesPer16kHzChannel];
int32_t analysis_filter_state1[6];
int32_t analysis_filter_state2[6];
int32_t synthesis_filter_state1[6];
int32_t synthesis_filter_state2[6];
};
// TODO(andrew): check range of input parameters?
AudioBuffer::AudioBuffer(int max_num_channels,
int samples_per_channel)
: max_num_channels_(max_num_channels),
num_channels_(0),
AudioBuffer::AudioBuffer(int input_samples_per_channel,
int num_input_channels,
int process_samples_per_channel,
int num_process_channels,
int output_samples_per_channel)
: input_samples_per_channel_(input_samples_per_channel),
num_input_channels_(num_input_channels),
proc_samples_per_channel_(process_samples_per_channel),
num_proc_channels_(num_process_channels),
output_samples_per_channel_(output_samples_per_channel),
samples_per_split_channel_(proc_samples_per_channel_),
num_mixed_channels_(0),
num_mixed_low_pass_channels_(0),
data_was_mixed_(false),
samples_per_channel_(samples_per_channel),
samples_per_split_channel_(samples_per_channel),
reference_copied_(false),
activity_(AudioFrame::kVadUnknown),
is_muted_(false),
data_(NULL),
channels_(NULL),
split_channels_(NULL),
mixed_channels_(NULL),
mixed_low_pass_channels_(NULL),
low_pass_reference_channels_(NULL) {
channels_.reset(new AudioChannel[max_num_channels_]);
mixed_channels_.reset(new AudioChannel[max_num_channels_]);
mixed_low_pass_channels_.reset(new AudioChannel[max_num_channels_]);
low_pass_reference_channels_.reset(new AudioChannel[max_num_channels_]);
channels_(new ChannelBuffer<int16_t>(proc_samples_per_channel_,
num_proc_channels_)) {
assert(input_samples_per_channel_ > 0);
assert(proc_samples_per_channel_ > 0);
assert(output_samples_per_channel_ > 0);
assert(num_input_channels_ > 0 && num_input_channels_ <= 2);
assert(num_proc_channels_ <= num_input_channels);
if (samples_per_channel_ == kSamplesPer32kHzChannel) {
split_channels_.reset(new SplitAudioChannel[max_num_channels_]);
if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_,
num_proc_channels_));
}
if (input_samples_per_channel_ != proc_samples_per_channel_ ||
output_samples_per_channel_ != proc_samples_per_channel_) {
// Create an intermediate buffer for resampling.
process_buffer_.reset(new ChannelBuffer<float>(proc_samples_per_channel_,
num_proc_channels_));
}
if (input_samples_per_channel_ != proc_samples_per_channel_) {
input_resamplers_.reserve(num_proc_channels_);
for (int i = 0; i < num_proc_channels_; ++i) {
input_resamplers_.push_back(
new PushSincResampler(input_samples_per_channel_,
proc_samples_per_channel_));
}
}
if (output_samples_per_channel_ != proc_samples_per_channel_) {
output_resamplers_.reserve(num_proc_channels_);
for (int i = 0; i < num_proc_channels_; ++i) {
output_resamplers_.push_back(
new PushSincResampler(proc_samples_per_channel_,
output_samples_per_channel_));
}
}
if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) {
samples_per_split_channel_ = kSamplesPer16kHzChannel;
split_channels_.reset(new SplitChannelBuffer(samples_per_split_channel_,
num_proc_channels_));
filter_states_.reset(new SplitFilterStates[num_proc_channels_]);
}
}
void AudioBuffer::CopyFrom(const float* const* data,
int samples_per_channel,
AudioProcessing::ChannelLayout layout) {
assert(samples_per_channel == input_samples_per_channel_);
assert(ChannelsFromLayout(layout) == num_input_channels_);
InitForNewData();
// Downmix.
const float* const* data_ptr = data;
if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
StereoToMono(data[0],
data[1],
input_buffer_->channel(0),
input_samples_per_channel_);
data_ptr = input_buffer_->channels();
}
// Resample.
if (input_samples_per_channel_ != proc_samples_per_channel_) {
for (int i = 0; i < num_proc_channels_; ++i) {
input_resamplers_[i]->Resample(data_ptr[i],
input_samples_per_channel_,
process_buffer_->channel(i),
proc_samples_per_channel_);
}
data_ptr = process_buffer_->channels();
}
// Convert to int16.
for (int i = 0; i < num_proc_channels_; ++i) {
ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_,
channels_->channel(i));
}
}
void AudioBuffer::CopyTo(int samples_per_channel,
AudioProcessing::ChannelLayout layout,
float* const* data) {
assert(samples_per_channel == output_samples_per_channel_);
assert(ChannelsFromLayout(layout) == num_proc_channels_);
// Convert to float.
float* const* data_ptr = data;
if (output_samples_per_channel_ != proc_samples_per_channel_) {
// Convert to an intermediate buffer for subsequent resampling.
data_ptr = process_buffer_->channels();
}
for (int i = 0; i < num_proc_channels_; ++i) {
ScaleToFloat(channels_->channel(i), proc_samples_per_channel_, data_ptr[i]);
}
// Resample.
if (output_samples_per_channel_ != proc_samples_per_channel_) {
for (int i = 0; i < num_proc_channels_; ++i) {
output_resamplers_[i]->Resample(data_ptr[i],
proc_samples_per_channel_,
data[i],
output_samples_per_channel_);
}
}
}
AudioBuffer::~AudioBuffer() {}
void AudioBuffer::InitForNewData(int num_channels) {
num_channels_ = num_channels;
void AudioBuffer::InitForNewData() {
data_ = NULL;
data_was_mixed_ = false;
num_mixed_channels_ = 0;
@ -102,71 +193,56 @@ void AudioBuffer::InitForNewData(int num_channels) {
}
int16_t* AudioBuffer::data(int channel) const {
assert(channel >= 0 && channel < num_channels_);
assert(channel >= 0 && channel < num_proc_channels_);
if (data_ != NULL) {
return data_;
}
return channels_[channel].data;
return channels_->channel(channel);
}
int16_t* AudioBuffer::low_pass_split_data(int channel) const {
assert(channel >= 0 && channel < num_channels_);
assert(channel >= 0 && channel < num_proc_channels_);
if (split_channels_.get() == NULL) {
return data(channel);
}
return split_channels_[channel].low_pass_data;
return split_channels_->low_channel(channel);
}
int16_t* AudioBuffer::high_pass_split_data(int channel) const {
assert(channel >= 0 && channel < num_channels_);
assert(channel >= 0 && channel < num_proc_channels_);
if (split_channels_.get() == NULL) {
return NULL;
}
return split_channels_[channel].high_pass_data;
return split_channels_->high_channel(channel);
}
int16_t* AudioBuffer::mixed_data(int channel) const {
assert(channel >= 0 && channel < num_mixed_channels_);
return mixed_channels_[channel].data;
return mixed_channels_->channel(channel);
}
int16_t* AudioBuffer::mixed_low_pass_data(int channel) const {
assert(channel >= 0 && channel < num_mixed_low_pass_channels_);
return mixed_low_pass_channels_[channel].data;
return mixed_low_pass_channels_->channel(channel);
}
int16_t* AudioBuffer::low_pass_reference(int channel) const {
assert(channel >= 0 && channel < num_channels_);
assert(channel >= 0 && channel < num_proc_channels_);
if (!reference_copied_) {
return NULL;
}
return low_pass_reference_channels_[channel].data;
return low_pass_reference_channels_->channel(channel);
}
int32_t* AudioBuffer::analysis_filter_state1(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].analysis_filter_state1;
}
int32_t* AudioBuffer::analysis_filter_state2(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].analysis_filter_state2;
}
int32_t* AudioBuffer::synthesis_filter_state1(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].synthesis_filter_state1;
}
int32_t* AudioBuffer::synthesis_filter_state2(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].synthesis_filter_state2;
SplitFilterStates* AudioBuffer::filter_states(int channel) const {
assert(channel >= 0 && channel < num_proc_channels_);
return &filter_states_[channel];
}
void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
@ -182,11 +258,11 @@ bool AudioBuffer::is_muted() const {
}
int AudioBuffer::num_channels() const {
return num_channels_;
return num_proc_channels_;
}
int AudioBuffer::samples_per_channel() const {
return samples_per_channel_;
return proc_samples_per_channel_;
}
int AudioBuffer::samples_per_split_channel() const {
@ -195,46 +271,49 @@ int AudioBuffer::samples_per_split_channel() const {
// TODO(andrew): Do deinterleaving and mixing in one step?
void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
assert(frame->num_channels_ <= max_num_channels_);
assert(frame->samples_per_channel_ == samples_per_channel_);
InitForNewData(frame->num_channels_);
assert(proc_samples_per_channel_ == input_samples_per_channel_);
assert(num_proc_channels_ == num_input_channels_);
assert(frame->num_channels_ == num_proc_channels_);
assert(frame->samples_per_channel_ == proc_samples_per_channel_);
InitForNewData();
activity_ = frame->vad_activity_;
if (frame->energy_ == 0) {
is_muted_ = true;
}
if (num_channels_ == 1) {
if (num_proc_channels_ == 1) {
// We can get away with a pointer assignment in this case.
data_ = frame->data_;
return;
}
int16_t* interleaved = frame->data_;
for (int i = 0; i < num_channels_; i++) {
int16_t* deinterleaved = channels_[i].data;
for (int i = 0; i < num_proc_channels_; i++) {
int16_t* deinterleaved = channels_->channel(i);
int interleaved_idx = i;
for (int j = 0; j < samples_per_channel_; j++) {
for (int j = 0; j < proc_samples_per_channel_; j++) {
deinterleaved[j] = interleaved[interleaved_idx];
interleaved_idx += num_channels_;
interleaved_idx += num_proc_channels_;
}
}
}
void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
assert(frame->num_channels_ == num_channels_);
assert(frame->samples_per_channel_ == samples_per_channel_);
assert(proc_samples_per_channel_ == output_samples_per_channel_);
assert(num_proc_channels_ == num_input_channels_);
assert(frame->num_channels_ == num_proc_channels_);
assert(frame->samples_per_channel_ == proc_samples_per_channel_);
frame->vad_activity_ = activity_;
if (!data_changed) {
return;
}
if (num_channels_ == 1) {
if (num_proc_channels_ == 1) {
if (data_was_mixed_) {
memcpy(frame->data_,
channels_[0].data,
sizeof(int16_t) * samples_per_channel_);
channels_->channel(0),
sizeof(int16_t) * proc_samples_per_channel_);
} else {
// These should point to the same buffer in this case.
assert(data_ == frame->data_);
@ -244,74 +323,47 @@ void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
}
int16_t* interleaved = frame->data_;
for (int i = 0; i < num_channels_; i++) {
int16_t* deinterleaved = channels_[i].data;
for (int i = 0; i < num_proc_channels_; i++) {
int16_t* deinterleaved = channels_->channel(i);
int interleaved_idx = i;
for (int j = 0; j < samples_per_channel_; j++) {
for (int j = 0; j < proc_samples_per_channel_; j++) {
interleaved[interleaved_idx] = deinterleaved[j];
interleaved_idx += num_channels_;
interleaved_idx += num_proc_channels_;
}
}
}
void AudioBuffer::CopyFrom(const float* const* data, int samples_per_channel,
int num_channels) {
assert(num_channels <= max_num_channels_);
assert(samples_per_channel == samples_per_channel_);
InitForNewData(num_channels);
for (int i = 0; i < num_channels_; ++i) {
ScaleAndRoundToInt16(data[i], samples_per_channel, channels_[i].data);
}
}
void AudioBuffer::CopyTo(int samples_per_channel, int num_channels,
float* const* data) const {
assert(num_channels == num_channels_);
assert(samples_per_channel == samples_per_channel_);
for (int i = 0; i < num_channels_; ++i) {
ScaleToFloat(channels_[i].data, samples_per_channel, data[i]);
}
}
// TODO(andrew): would be good to support the no-mix case with pointer
// assignment.
// TODO(andrew): handle mixing to multiple channels?
void AudioBuffer::Mix(int num_mixed_channels) {
// We currently only support the stereo to mono case.
assert(num_channels_ == 2);
assert(num_mixed_channels == 1);
StereoToMono(channels_[0].data,
channels_[1].data,
channels_[0].data,
samples_per_channel_);
num_channels_ = num_mixed_channels;
data_was_mixed_ = true;
}
void AudioBuffer::CopyAndMix(int num_mixed_channels) {
// We currently only support the stereo to mono case.
assert(num_channels_ == 2);
assert(num_proc_channels_ == 2);
assert(num_mixed_channels == 1);
if (!mixed_channels_.get()) {
mixed_channels_.reset(
new ChannelBuffer<int16_t>(proc_samples_per_channel_,
num_mixed_channels));
}
StereoToMono(channels_[0].data,
channels_[1].data,
mixed_channels_[0].data,
samples_per_channel_);
StereoToMono(channels_->channel(0),
channels_->channel(1),
mixed_channels_->channel(0),
proc_samples_per_channel_);
num_mixed_channels_ = num_mixed_channels;
}
void AudioBuffer::CopyAndMixLowPass(int num_mixed_channels) {
// We currently only support the stereo to mono case.
assert(num_channels_ == 2);
assert(num_proc_channels_ == 2);
assert(num_mixed_channels == 1);
if (!mixed_low_pass_channels_.get()) {
mixed_low_pass_channels_.reset(
new ChannelBuffer<int16_t>(samples_per_split_channel_,
num_mixed_channels));
}
StereoToMono(low_pass_split_data(0),
low_pass_split_data(1),
mixed_low_pass_channels_[0].data,
mixed_low_pass_channels_->channel(0),
samples_per_split_channel_);
num_mixed_low_pass_channels_ = num_mixed_channels;
@ -319,10 +371,14 @@ void AudioBuffer::CopyAndMixLowPass(int num_mixed_channels) {
void AudioBuffer::CopyLowPassToReference() {
reference_copied_ = true;
for (int i = 0; i < num_channels_; i++) {
memcpy(low_pass_reference_channels_[i].data,
low_pass_split_data(i),
sizeof(int16_t) * samples_per_split_channel_);
if (!low_pass_reference_channels_.get()) {
low_pass_reference_channels_.reset(
new ChannelBuffer<int16_t>(samples_per_split_channel_,
num_proc_channels_));
}
for (int i = 0; i < num_proc_channels_; i++) {
low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i);
}
}
} // namespace webrtc

View File

@ -8,21 +8,46 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_
#include <vector>
#include "webrtc/modules/audio_processing/common.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/system_wrappers/interface/scoped_vector.h"
#include "webrtc/typedefs.h"
namespace webrtc {
struct AudioChannel;
struct SplitAudioChannel;
class PushSincResampler;
class SplitChannelBuffer;
struct SplitFilterStates {
SplitFilterStates() {
memset(analysis_filter_state1, 0, sizeof(analysis_filter_state1));
memset(analysis_filter_state2, 0, sizeof(analysis_filter_state2));
memset(synthesis_filter_state1, 0, sizeof(synthesis_filter_state1));
memset(synthesis_filter_state2, 0, sizeof(synthesis_filter_state2));
}
static const int kStateSize = 6;
int analysis_filter_state1[kStateSize];
int analysis_filter_state2[kStateSize];
int synthesis_filter_state1[kStateSize];
int synthesis_filter_state2[kStateSize];
};
class AudioBuffer {
public:
AudioBuffer(int max_num_channels, int samples_per_channel);
// TODO(ajm): Switch to take ChannelLayouts.
AudioBuffer(int input_samples_per_channel,
int num_input_channels,
int process_samples_per_channel,
int num_process_channels,
int output_samples_per_channel);
virtual ~AudioBuffer();
int num_channels() const;
@ -36,10 +61,7 @@ class AudioBuffer {
int16_t* mixed_low_pass_data(int channel) const;
int16_t* low_pass_reference(int channel) const;
int32_t* analysis_filter_state1(int channel) const;
int32_t* analysis_filter_state2(int channel) const;
int32_t* synthesis_filter_state1(int channel) const;
int32_t* synthesis_filter_state2(int channel) const;
SplitFilterStates* filter_states(int channel) const;
void set_activity(AudioFrame::VADActivity activity);
AudioFrame::VADActivity activity() const;
@ -54,40 +76,48 @@ class AudioBuffer {
void InterleaveTo(AudioFrame* frame, bool data_changed) const;
// Use for float deinterleaved data.
void CopyFrom(const float* const* data, int samples_per_channel,
int num_channels);
void CopyTo(int samples_per_channel, int num_channels,
float* const* data) const;
void CopyFrom(const float* const* data,
int samples_per_channel,
AudioProcessing::ChannelLayout layout);
void CopyTo(int samples_per_channel,
AudioProcessing::ChannelLayout layout,
float* const* data);
void Mix(int num_mixed_channels);
void CopyAndMix(int num_mixed_channels);
void CopyAndMixLowPass(int num_mixed_channels);
void CopyLowPassToReference();
private:
// Called from DeinterleaveFrom() and CopyFrom().
void InitForNewData(int num_channels);
void InitForNewData();
const int max_num_channels_;
int num_channels_;
const int input_samples_per_channel_;
const int num_input_channels_;
const int proc_samples_per_channel_;
const int num_proc_channels_;
const int output_samples_per_channel_;
int samples_per_split_channel_;
int num_mixed_channels_;
int num_mixed_low_pass_channels_;
// Whether the original data was replaced with mixed data.
bool data_was_mixed_;
const int samples_per_channel_;
int samples_per_split_channel_;
bool reference_copied_;
AudioFrame::VADActivity activity_;
bool is_muted_;
int16_t* data_;
scoped_array<AudioChannel> channels_;
scoped_array<SplitAudioChannel> split_channels_;
scoped_array<AudioChannel> mixed_channels_;
// TODO(andrew): improve this, we don't need the full 32 kHz space here.
scoped_array<AudioChannel> mixed_low_pass_channels_;
scoped_array<AudioChannel> low_pass_reference_channels_;
scoped_ptr<ChannelBuffer<int16_t> > channels_;
scoped_ptr<SplitChannelBuffer> split_channels_;
scoped_ptr<SplitFilterStates[]> filter_states_;
scoped_ptr<ChannelBuffer<int16_t> > mixed_channels_;
scoped_ptr<ChannelBuffer<int16_t> > mixed_low_pass_channels_;
scoped_ptr<ChannelBuffer<int16_t> > low_pass_reference_channels_;
scoped_ptr<ChannelBuffer<float> > input_buffer_;
scoped_ptr<ChannelBuffer<float> > process_buffer_;
ScopedVector<PushSincResampler> input_resamplers_;
ScopedVector<PushSincResampler> output_resamplers_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_

View File

@ -54,6 +54,7 @@
'audio_buffer.h',
'audio_processing_impl.cc',
'audio_processing_impl.h',
'common.h',
'echo_cancellation_impl.cc',
'echo_cancellation_impl.h',
'echo_control_mobile_impl.cc',

View File

@ -15,6 +15,7 @@
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/modules/audio_processing/common.h"
#include "webrtc/modules/audio_processing/echo_cancellation_impl.h"
#include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"
#include "webrtc/modules/audio_processing/gain_control_impl.h"
@ -47,24 +48,6 @@
} while (0)
namespace webrtc {
namespace {
const int kChunkSizeMs = 10;
int ChannelsFromLayout(AudioProcessing::ChannelLayout layout) {
switch (layout) {
case AudioProcessing::kMono:
case AudioProcessing::kMonoAndKeyboard:
return 1;
case AudioProcessing::kStereo:
case AudioProcessing::kStereoAndKeyboard:
return 2;
}
assert(false);
return -1;
}
} // namespace
// Throughout webrtc, it's assumed that success is represented by zero.
COMPILE_ASSERT(AudioProcessing::kNoError == 0, no_error_must_be_zero);
@ -97,24 +80,19 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config)
noise_suppression_(NULL),
voice_detection_(NULL),
crit_(CriticalSectionWrapper::CreateCriticalSection()),
render_audio_(NULL),
capture_audio_(NULL),
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
debug_file_(FileWrapper::Create()),
event_msg_(new audioproc::Event()),
#endif
sample_rate_hz_(kSampleRate16kHz),
reverse_sample_rate_hz_(kSampleRate16kHz),
split_sample_rate_hz_(kSampleRate16kHz),
samples_per_channel_(kChunkSizeMs * sample_rate_hz_ / 1000),
reverse_samples_per_channel_(
kChunkSizeMs * reverse_sample_rate_hz_ / 1000),
fwd_in_format_(kSampleRate16kHz, 1),
fwd_proc_format_(kSampleRate16kHz, 1),
fwd_out_format_(kSampleRate16kHz),
rev_in_format_(kSampleRate16kHz, 1),
rev_proc_format_(kSampleRate16kHz, 1),
split_rate_(kSampleRate16kHz),
stream_delay_ms_(0),
delay_offset_ms_(0),
was_stream_delay_set_(false),
num_reverse_channels_(1),
num_input_channels_(1),
num_output_channels_(1),
output_will_be_muted_(false),
key_pressed_(false) {
echo_cancellation_ = new EchoCancellationImpl(this, crit_);
@ -156,59 +134,52 @@ AudioProcessingImpl::~AudioProcessingImpl() {
debug_file_->CloseFile();
}
#endif
if (render_audio_) {
delete render_audio_;
render_audio_ = NULL;
}
if (capture_audio_) {
delete capture_audio_;
capture_audio_ = NULL;
}
}
delete crit_;
crit_ = NULL;
}
int AudioProcessingImpl::split_sample_rate_hz() const {
return split_sample_rate_hz_;
}
int AudioProcessingImpl::Initialize() {
CriticalSectionScoped crit_scoped(crit_);
return InitializeLocked();
}
int AudioProcessingImpl::Initialize(int sample_rate_hz,
int reverse_sample_rate_hz,
int num_input_channels,
int num_output_channels,
int num_reverse_channels) {
int AudioProcessingImpl::set_sample_rate_hz(int rate) {
CriticalSectionScoped crit_scoped(crit_);
return InitializeLocked(sample_rate_hz,
return InitializeLocked(rate,
rate,
rev_in_format_.rate(),
fwd_in_format_.num_channels(),
fwd_proc_format_.num_channels(),
rev_in_format_.num_channels());
}
int AudioProcessingImpl::Initialize(int input_sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
ChannelLayout input_layout,
ChannelLayout output_layout,
ChannelLayout reverse_layout) {
CriticalSectionScoped crit_scoped(crit_);
return InitializeLocked(input_sample_rate_hz,
output_sample_rate_hz,
reverse_sample_rate_hz,
num_input_channels,
num_output_channels,
num_reverse_channels);
ChannelsFromLayout(input_layout),
ChannelsFromLayout(output_layout),
ChannelsFromLayout(reverse_layout));
}
int AudioProcessingImpl::InitializeLocked() {
if (render_audio_ != NULL) {
delete render_audio_;
render_audio_ = NULL;
}
if (capture_audio_ != NULL) {
delete capture_audio_;
capture_audio_ = NULL;
}
render_audio_ = new AudioBuffer(num_reverse_channels_,
reverse_samples_per_channel_);
capture_audio_ = new AudioBuffer(num_input_channels_,
samples_per_channel_);
render_audio_.reset(new AudioBuffer(rev_in_format_.samples_per_channel(),
rev_in_format_.num_channels(),
rev_proc_format_.samples_per_channel(),
rev_proc_format_.num_channels(),
rev_proc_format_.samples_per_channel()));
capture_audio_.reset(new AudioBuffer(fwd_in_format_.samples_per_channel(),
fwd_in_format_.num_channels(),
fwd_proc_format_.samples_per_channel(),
fwd_proc_format_.num_channels(),
fwd_out_format_.samples_per_channel()));
// Initialize all components.
std::list<ProcessingComponent*>::iterator it;
@ -231,24 +202,15 @@ int AudioProcessingImpl::InitializeLocked() {
return kNoError;
}
int AudioProcessingImpl::InitializeLocked(int sample_rate_hz,
int AudioProcessingImpl::InitializeLocked(int input_sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
int num_input_channels,
int num_output_channels,
int num_reverse_channels) {
if (sample_rate_hz != kSampleRate8kHz &&
sample_rate_hz != kSampleRate16kHz &&
sample_rate_hz != kSampleRate32kHz) {
return kBadSampleRateError;
}
if (reverse_sample_rate_hz != kSampleRate8kHz &&
reverse_sample_rate_hz != kSampleRate16kHz &&
reverse_sample_rate_hz != kSampleRate32kHz) {
return kBadSampleRateError;
}
// TODO(ajm): The reverse sample rate is constrained to be identical to the
// forward rate for now.
if (reverse_sample_rate_hz != sample_rate_hz) {
if (input_sample_rate_hz <= 0 ||
output_sample_rate_hz <= 0 ||
reverse_sample_rate_hz <= 0) {
return kBadSampleRateError;
}
if (num_output_channels > num_input_channels) {
@ -260,23 +222,50 @@ int AudioProcessingImpl::InitializeLocked(int sample_rate_hz,
num_reverse_channels > 2 || num_reverse_channels < 1) {
return kBadNumberChannelsError;
}
if (echo_control_mobile_->is_enabled() && sample_rate_hz > kSampleRate16kHz) {
LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates";
return kUnsupportedComponentError;
fwd_in_format_.set(input_sample_rate_hz, num_input_channels);
fwd_out_format_.set(output_sample_rate_hz);
rev_in_format_.set(reverse_sample_rate_hz, num_reverse_channels);
// We process at the closest native rate >= min(input rate, output rate)...
int min_proc_rate = std::min(fwd_in_format_.rate(), fwd_out_format_.rate());
int fwd_proc_rate;
if (min_proc_rate > kSampleRate16kHz) {
fwd_proc_rate = kSampleRate32kHz;
} else if (min_proc_rate > kSampleRate8kHz) {
fwd_proc_rate = kSampleRate16kHz;
} else {
fwd_proc_rate = kSampleRate8kHz;
}
// ...with one exception.
if (echo_control_mobile_->is_enabled() && min_proc_rate > kSampleRate16kHz) {
fwd_proc_rate = kSampleRate16kHz;
}
sample_rate_hz_ = sample_rate_hz;
reverse_sample_rate_hz_ = reverse_sample_rate_hz;
reverse_samples_per_channel_ = kChunkSizeMs * reverse_sample_rate_hz / 1000;
samples_per_channel_ = kChunkSizeMs * sample_rate_hz / 1000;
num_input_channels_ = num_input_channels;
num_output_channels_ = num_output_channels;
num_reverse_channels_ = num_reverse_channels;
fwd_proc_format_.set(fwd_proc_rate, num_output_channels);
if (sample_rate_hz_ == kSampleRate32kHz) {
split_sample_rate_hz_ = kSampleRate16kHz;
// We normally process the reverse stream at 16 kHz. Unless...
int rev_proc_rate = kSampleRate16kHz;
if (fwd_proc_format_.rate() == kSampleRate8kHz) {
// ...the forward stream is at 8 kHz.
rev_proc_rate = kSampleRate8kHz;
} else {
split_sample_rate_hz_ = sample_rate_hz_;
if (rev_in_format_.rate() == kSampleRate32kHz) {
// ...or the input is at 32 kHz, in which case we use the splitting
// filter rather than the resampler.
rev_proc_rate = kSampleRate32kHz;
}
}
// TODO(ajm): Enable this.
// Always downmix the reverse stream to mono for analysis.
//rev_proc_format_.set(rev_proc_rate, 1);
rev_proc_format_.set(rev_proc_rate, rev_in_format_.num_channels());
if (fwd_proc_format_.rate() == kSampleRate32kHz) {
split_rate_ = kSampleRate16kHz;
} else {
split_rate_ = fwd_proc_format_.rate();
}
return InitializeLocked();
@ -284,20 +273,23 @@ int AudioProcessingImpl::InitializeLocked(int sample_rate_hz,
// Calls InitializeLocked() if any of the audio parameters have changed from
// their current values.
int AudioProcessingImpl::MaybeInitializeLocked(int sample_rate_hz,
int AudioProcessingImpl::MaybeInitializeLocked(int input_sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
int num_input_channels,
int num_output_channels,
int num_reverse_channels) {
if (sample_rate_hz == sample_rate_hz_ &&
reverse_sample_rate_hz == reverse_sample_rate_hz_ &&
num_input_channels == num_input_channels_ &&
num_output_channels == num_output_channels_ &&
num_reverse_channels == num_reverse_channels_) {
if (input_sample_rate_hz == fwd_in_format_.rate() &&
output_sample_rate_hz == fwd_out_format_.rate() &&
reverse_sample_rate_hz == rev_in_format_.rate() &&
num_input_channels == fwd_in_format_.num_channels() &&
num_output_channels == fwd_proc_format_.num_channels() &&
num_reverse_channels == rev_in_format_.num_channels()) {
return kNoError;
}
return InitializeLocked(sample_rate_hz,
return InitializeLocked(input_sample_rate_hz,
output_sample_rate_hz,
reverse_sample_rate_hz,
num_input_channels,
num_output_channels,
@ -315,86 +307,29 @@ int AudioProcessingImpl::EnableExperimentalNs(bool enable) {
return kNoError;
}
int AudioProcessingImpl::set_sample_rate_hz(int rate) {
int AudioProcessingImpl::input_sample_rate_hz() const {
CriticalSectionScoped crit_scoped(crit_);
if (rate == sample_rate_hz_) {
return kNoError;
}
if (rate != kSampleRate8kHz &&
rate != kSampleRate16kHz &&
rate != kSampleRate32kHz) {
return kBadParameterError;
}
if (echo_control_mobile_->is_enabled() && rate > kSampleRate16kHz) {
LOG(LS_ERROR) << "AECM only supports 16 kHz or lower sample rates";
return kUnsupportedComponentError;
}
sample_rate_hz_ = rate;
samples_per_channel_ = rate / 100;
if (sample_rate_hz_ == kSampleRate32kHz) {
split_sample_rate_hz_ = kSampleRate16kHz;
} else {
split_sample_rate_hz_ = sample_rate_hz_;
}
return InitializeLocked();
return fwd_in_format_.rate();
}
int AudioProcessingImpl::sample_rate_hz() const {
CriticalSectionScoped crit_scoped(crit_);
return sample_rate_hz_;
int AudioProcessingImpl::proc_sample_rate_hz() const {
return fwd_proc_format_.rate();
}
int AudioProcessingImpl::set_num_reverse_channels(int channels) {
CriticalSectionScoped crit_scoped(crit_);
if (channels == num_reverse_channels_) {
return kNoError;
}
// Only stereo supported currently.
if (channels > 2 || channels < 1) {
return kBadParameterError;
}
num_reverse_channels_ = channels;
return InitializeLocked();
int AudioProcessingImpl::proc_split_sample_rate_hz() const {
return split_rate_;
}
int AudioProcessingImpl::num_reverse_channels() const {
return num_reverse_channels_;
}
int AudioProcessingImpl::set_num_channels(
int input_channels,
int output_channels) {
CriticalSectionScoped crit_scoped(crit_);
if (input_channels == num_input_channels_ &&
output_channels == num_output_channels_) {
return kNoError;
}
if (output_channels > input_channels) {
return kBadParameterError;
}
// Only stereo supported currently.
if (input_channels > 2 || input_channels < 1 ||
output_channels > 2 || output_channels < 1) {
return kBadParameterError;
}
num_input_channels_ = input_channels;
num_output_channels_ = output_channels;
return InitializeLocked();
return rev_proc_format_.num_channels();
}
int AudioProcessingImpl::num_input_channels() const {
return num_input_channels_;
return fwd_in_format_.num_channels();
}
int AudioProcessingImpl::num_output_channels() const {
return num_output_channels_;
return fwd_proc_format_.num_channels();
}
void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
@ -405,24 +340,25 @@ bool AudioProcessingImpl::output_will_be_muted() const {
return output_will_be_muted_;
}
int AudioProcessingImpl::ProcessStream(float* const* data,
int AudioProcessingImpl::ProcessStream(const float* const* src,
int samples_per_channel,
int sample_rate_hz,
int input_sample_rate_hz,
ChannelLayout input_layout,
ChannelLayout output_layout) {
int output_sample_rate_hz,
ChannelLayout output_layout,
float* const* dest) {
CriticalSectionScoped crit_scoped(crit_);
if (!data) {
if (!src || !dest) {
return kNullPointerError;
}
const int num_input_channels = ChannelsFromLayout(input_layout);
// TODO(ajm): We now always set the output channels equal to the input
// channels here. Restore the ability to downmix.
// TODO(ajm): The reverse sample rate is constrained to be identical to the
// forward rate for now.
RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz, sample_rate_hz,
num_input_channels, num_input_channels, num_reverse_channels_));
if (samples_per_channel != samples_per_channel_) {
RETURN_ON_ERR(MaybeInitializeLocked(input_sample_rate_hz,
output_sample_rate_hz,
rev_in_format_.rate(),
ChannelsFromLayout(input_layout),
ChannelsFromLayout(output_layout),
rev_in_format_.num_channels()));
if (samples_per_channel != fwd_in_format_.samples_per_channel()) {
return kBadDataLengthError;
}
@ -431,23 +367,25 @@ int AudioProcessingImpl::ProcessStream(float* const* data,
event_msg_->set_type(audioproc::Event::STREAM);
audioproc::Stream* msg = event_msg_->mutable_stream();
const size_t channel_size = sizeof(float) * samples_per_channel;
for (int i = 0; i < num_input_channels; ++i)
msg->add_input_channel(data[i], channel_size);
for (int i = 0; i < fwd_in_format_.num_channels(); ++i)
msg->add_input_channel(src[i], channel_size);
}
#endif
capture_audio_->CopyFrom(data, samples_per_channel, num_output_channels_);
capture_audio_->CopyFrom(src, samples_per_channel, input_layout);
RETURN_ON_ERR(ProcessStreamLocked());
if (output_copy_needed(is_data_processed())) {
capture_audio_->CopyTo(samples_per_channel, num_output_channels_, data);
capture_audio_->CopyTo(fwd_out_format_.samples_per_channel(),
output_layout,
dest);
}
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
audioproc::Stream* msg = event_msg_->mutable_stream();
const size_t channel_size = sizeof(float) * samples_per_channel;
for (int i = 0; i < num_output_channels_; ++i)
msg->add_output_channel(data[i], channel_size);
for (int i = 0; i < fwd_proc_format_.num_channels(); ++i)
msg->add_output_channel(dest[i], channel_size);
RETURN_ON_ERR(WriteMessageToDebugFile());
}
#endif
@ -460,15 +398,27 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
if (!frame) {
return kNullPointerError;
}
// Must be a native rate.
if (frame->sample_rate_hz_ != kSampleRate8kHz &&
frame->sample_rate_hz_ != kSampleRate16kHz &&
frame->sample_rate_hz_ != kSampleRate32kHz) {
return kBadSampleRateError;
}
if (echo_control_mobile_->is_enabled() &&
frame->sample_rate_hz_ > kSampleRate16kHz) {
LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates";
return kUnsupportedComponentError;
}
// TODO(ajm): We now always set the output channels equal to the input
// channels here. Restore the ability to downmix.
// TODO(ajm): The reverse sample rate is constrained to be identical to the
// forward rate for now.
// TODO(ajm): The input and output rates and channels are currently
// constrained to be identical in the int16 interface.
RETURN_ON_ERR(MaybeInitializeLocked(frame->sample_rate_hz_,
frame->sample_rate_hz_, frame->num_channels_, frame->num_channels_,
num_reverse_channels_));
if (frame->samples_per_channel_ != samples_per_channel_) {
frame->sample_rate_hz_,
rev_in_format_.rate(),
frame->num_channels_,
frame->num_channels_,
rev_in_format_.num_channels()));
if (frame->samples_per_channel_ != fwd_in_format_.samples_per_channel()) {
return kBadDataLengthError;
}
@ -484,10 +434,6 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
#endif
capture_audio_->DeinterleaveFrom(frame);
if (num_output_channels_ < num_input_channels_) {
capture_audio_->Mix(num_output_channels_);
frame->num_channels_ = num_output_channels_;
}
RETURN_ON_ERR(ProcessStreamLocked());
capture_audio_->InterleaveTo(frame, output_copy_needed(is_data_processed()));
@ -519,44 +465,46 @@ int AudioProcessingImpl::ProcessStreamLocked() {
bool data_processed = is_data_processed();
if (analysis_needed(data_processed)) {
for (int i = 0; i < num_output_channels_; i++) {
for (int i = 0; i < fwd_proc_format_.num_channels(); i++) {
SplitFilterStates* filter_states = capture_audio_->filter_states(i);
// Split into a low and high band.
WebRtcSpl_AnalysisQMF(capture_audio_->data(i),
capture_audio_->samples_per_channel(),
capture_audio_->low_pass_split_data(i),
capture_audio_->high_pass_split_data(i),
capture_audio_->analysis_filter_state1(i),
capture_audio_->analysis_filter_state2(i));
filter_states->analysis_filter_state1,
filter_states->analysis_filter_state2);
}
}
RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(capture_audio_));
RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(capture_audio_));
RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(capture_audio_));
RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(capture_audio_.get()));
RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(capture_audio_.get()));
RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(capture_audio_.get()));
if (echo_control_mobile_->is_enabled() &&
noise_suppression_->is_enabled()) {
if (echo_control_mobile_->is_enabled() && noise_suppression_->is_enabled()) {
capture_audio_->CopyLowPassToReference();
}
RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(capture_audio_));
RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(capture_audio_));
RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(capture_audio_));
RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(capture_audio_));
RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(capture_audio_.get()));
RETURN_ON_ERR(
echo_control_mobile_->ProcessCaptureAudio(capture_audio_.get()));
RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(capture_audio_.get()));
RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(capture_audio_.get()));
if (synthesis_needed(data_processed)) {
for (int i = 0; i < num_output_channels_; i++) {
for (int i = 0; i < fwd_proc_format_.num_channels(); i++) {
// Recombine low and high bands.
SplitFilterStates* filter_states = capture_audio_->filter_states(i);
WebRtcSpl_SynthesisQMF(capture_audio_->low_pass_split_data(i),
capture_audio_->high_pass_split_data(i),
capture_audio_->samples_per_split_channel(),
capture_audio_->data(i),
capture_audio_->synthesis_filter_state1(i),
capture_audio_->synthesis_filter_state2(i));
filter_states->synthesis_filter_state1,
filter_states->synthesis_filter_state2);
}
}
// The level estimator operates on the recombined data.
RETURN_ON_ERR(level_estimator_->ProcessStream(capture_audio_));
RETURN_ON_ERR(level_estimator_->ProcessStream(capture_audio_.get()));
was_stream_delay_set_ = false;
return kNoError;
@ -570,16 +518,15 @@ int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
if (data == NULL) {
return kNullPointerError;
}
if (sample_rate_hz != sample_rate_hz_) {
return kBadSampleRateError;
}
const int num_channels = ChannelsFromLayout(layout);
// TODO(ajm): The reverse sample rate is constrained to be identical to the
// forward rate for now.
RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, sample_rate_hz_,
num_input_channels_, num_output_channels_, num_channels));
if (samples_per_channel != reverse_samples_per_channel_) {
RETURN_ON_ERR(MaybeInitializeLocked(fwd_in_format_.rate(),
fwd_out_format_.rate(),
sample_rate_hz,
fwd_in_format_.num_channels(),
fwd_proc_format_.num_channels(),
num_channels));
if (samples_per_channel != rev_in_format_.samples_per_channel()) {
return kBadDataLengthError;
}
@ -594,7 +541,7 @@ int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data,
}
#endif
render_audio_->CopyFrom(data, samples_per_channel, num_channels);
render_audio_->CopyFrom(data, samples_per_channel, layout);
return AnalyzeReverseStreamLocked();
}
@ -603,15 +550,24 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
if (frame == NULL) {
return kNullPointerError;
}
if (frame->sample_rate_hz_ != sample_rate_hz_) {
// Must be a native rate.
if (frame->sample_rate_hz_ != kSampleRate8kHz &&
frame->sample_rate_hz_ != kSampleRate16kHz &&
frame->sample_rate_hz_ != kSampleRate32kHz) {
return kBadSampleRateError;
}
// This interface does not tolerate different forward and reverse rates.
if (frame->sample_rate_hz_ != fwd_in_format_.rate()) {
return kBadSampleRateError;
}
// TODO(ajm): The reverse sample rate is constrained to be identical to the
// forward rate for now.
RETURN_ON_ERR(MaybeInitializeLocked(sample_rate_hz_, sample_rate_hz_,
num_input_channels_, num_output_channels_, frame->num_channels_));
if (frame->samples_per_channel_ != reverse_samples_per_channel_) {
RETURN_ON_ERR(MaybeInitializeLocked(fwd_in_format_.rate(),
fwd_out_format_.rate(),
frame->sample_rate_hz_,
fwd_in_format_.num_channels(),
fwd_in_format_.num_channels(),
frame->num_channels_));
if (frame->samples_per_channel_ != rev_in_format_.samples_per_channel()) {
return kBadDataLengthError;
}
@ -636,21 +592,22 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
// We can be smarter and use the splitting filter when appropriate. Similarly,
// perform downmixing here.
int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
if (sample_rate_hz_ == kSampleRate32kHz) {
for (int i = 0; i < num_reverse_channels_; i++) {
if (rev_proc_format_.rate() == kSampleRate32kHz) {
for (int i = 0; i < rev_proc_format_.num_channels(); i++) {
// Split into low and high band.
SplitFilterStates* filter_states = render_audio_->filter_states(i);
WebRtcSpl_AnalysisQMF(render_audio_->data(i),
render_audio_->samples_per_channel(),
render_audio_->low_pass_split_data(i),
render_audio_->high_pass_split_data(i),
render_audio_->analysis_filter_state1(i),
render_audio_->analysis_filter_state2(i));
filter_states->analysis_filter_state1,
filter_states->analysis_filter_state2);
}
}
RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(render_audio_));
RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(render_audio_));
RETURN_ON_ERR(gain_control_->ProcessRenderAudio(render_audio_));
RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(render_audio_.get()));
RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(render_audio_.get()));
RETURN_ON_ERR(gain_control_->ProcessRenderAudio(render_audio_.get()));
return kNoError;
}
@ -832,18 +789,19 @@ bool AudioProcessingImpl::is_data_processed() const {
bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
// Check if we've upmixed or downmixed the audio.
return (num_output_channels_ != num_input_channels_ || is_data_processed);
return ((fwd_proc_format_.num_channels() != fwd_in_format_.num_channels()) ||
is_data_processed);
}
bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const {
return (is_data_processed && sample_rate_hz_ == kSampleRate32kHz);
return (is_data_processed && fwd_proc_format_.rate() == kSampleRate32kHz);
}
bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {
if (!is_data_processed && !voice_detection_->is_enabled()) {
// Only level_estimator_ is enabled.
return false;
} else if (sample_rate_hz_ == kSampleRate32kHz) {
} else if (fwd_proc_format_.rate() == kSampleRate32kHz) {
// Something besides level_estimator_ is enabled, and we have super-wb.
return true;
}
@ -881,12 +839,12 @@ int AudioProcessingImpl::WriteMessageToDebugFile() {
int AudioProcessingImpl::WriteInitMessage() {
event_msg_->set_type(audioproc::Event::INIT);
audioproc::Init* msg = event_msg_->mutable_init();
msg->set_sample_rate(sample_rate_hz_);
msg->set_device_sample_rate(echo_cancellation_->device_sample_rate_hz());
msg->set_num_input_channels(num_input_channels_);
msg->set_num_output_channels(num_output_channels_);
msg->set_num_reverse_channels(num_reverse_channels_);
msg->set_reverse_sample_rate(reverse_sample_rate_hz_);
msg->set_sample_rate(fwd_in_format_.rate());
msg->set_num_input_channels(fwd_in_format_.num_channels());
msg->set_num_output_channels(fwd_proc_format_.num_channels());
msg->set_num_reverse_channels(rev_in_format_.num_channels());
msg->set_reverse_sample_rate(rev_in_format_.rate());
msg->set_output_sample_rate(fwd_out_format_.rate());
int err = WriteMessageToDebugFile();
if (err != kNoError) {
@ -896,4 +854,5 @@ int AudioProcessingImpl::WriteInitMessage() {
return kNoError;
}
#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
} // namespace webrtc

View File

@ -19,6 +19,7 @@
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
namespace webrtc {
class AudioBuffer;
class CriticalSectionWrapper;
class EchoCancellationImpl;
@ -39,6 +40,44 @@ class Event;
} // namespace audioproc
#endif
class AudioRate {
public:
explicit AudioRate(int sample_rate_hz)
: rate_(sample_rate_hz),
samples_per_channel_(AudioProcessing::kChunkSizeMs * rate_ / 1000) {}
virtual ~AudioRate() {}
void set(int rate) {
rate_ = rate;
samples_per_channel_ = AudioProcessing::kChunkSizeMs * rate_ / 1000;
}
int rate() const { return rate_; }
int samples_per_channel() const { return samples_per_channel_; }
private:
int rate_;
int samples_per_channel_;
};
class AudioFormat : public AudioRate {
public:
AudioFormat(int sample_rate_hz, int num_channels)
: AudioRate(sample_rate_hz),
num_channels_(num_channels) {}
virtual ~AudioFormat() {}
void set(int rate, int num_channels) {
AudioRate::set(rate);
num_channels_ = num_channels;
}
int num_channels() const { return num_channels_; }
private:
int num_channels_;
};
class AudioProcessingImpl : public AudioProcessing {
public:
explicit AudioProcessingImpl(const Config& config);
@ -46,33 +85,34 @@ class AudioProcessingImpl : public AudioProcessing {
// AudioProcessing methods.
virtual int Initialize() OVERRIDE;
virtual int Initialize(int sample_rate_hz,
virtual int Initialize(int input_sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
int num_input_channels,
int num_output_channels,
int num_reverse_channels) OVERRIDE;
ChannelLayout input_layout,
ChannelLayout output_layout,
ChannelLayout reverse_layout) OVERRIDE;
virtual void SetExtraOptions(const Config& config) OVERRIDE;
virtual int EnableExperimentalNs(bool enable) OVERRIDE;
virtual bool experimental_ns_enabled() const OVERRIDE {
return false;
}
virtual int set_sample_rate_hz(int rate) OVERRIDE;
virtual int sample_rate_hz() const OVERRIDE;
virtual int split_sample_rate_hz() const OVERRIDE;
virtual int set_num_channels(int input_channels,
int output_channels) OVERRIDE;
virtual int input_sample_rate_hz() const OVERRIDE;
virtual int proc_sample_rate_hz() const OVERRIDE;
virtual int proc_split_sample_rate_hz() const OVERRIDE;
virtual int num_input_channels() const OVERRIDE;
virtual int num_output_channels() const OVERRIDE;
virtual int set_num_reverse_channels(int channels) OVERRIDE;
virtual int num_reverse_channels() const OVERRIDE;
virtual void set_output_will_be_muted(bool muted) OVERRIDE;
virtual bool output_will_be_muted() const OVERRIDE;
virtual int ProcessStream(AudioFrame* frame) OVERRIDE;
virtual int ProcessStream(float* const* data,
virtual int ProcessStream(const float* const* src,
int samples_per_channel,
int sample_rate_hz,
int input_sample_rate_hz,
ChannelLayout input_layout,
ChannelLayout output_layout) OVERRIDE;
int output_sample_rate_hz,
ChannelLayout output_layout,
float* const* dest) OVERRIDE;
virtual int AnalyzeReverseStream(AudioFrame* frame) OVERRIDE;
virtual int AnalyzeReverseStream(const float* const* data,
int samples_per_channel,
@ -102,12 +142,14 @@ class AudioProcessingImpl : public AudioProcessing {
virtual int InitializeLocked();
private:
int InitializeLocked(int sample_rate_hz,
int InitializeLocked(int input_sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
int num_input_channels,
int num_output_channels,
int num_reverse_channels);
int MaybeInitializeLocked(int sample_rate_hz,
int MaybeInitializeLocked(int input_sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
int num_input_channels,
int num_output_channels,
@ -130,8 +172,8 @@ class AudioProcessingImpl : public AudioProcessing {
std::list<ProcessingComponent*> component_list_;
CriticalSectionWrapper* crit_;
AudioBuffer* render_audio_;
AudioBuffer* capture_audio_;
scoped_ptr<AudioBuffer> render_audio_;
scoped_ptr<AudioBuffer> capture_audio_;
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
// TODO(andrew): make this more graceful. Ideally we would split this stuff
// out into a separate class with an "enabled" and "disabled" implementation.
@ -142,22 +184,22 @@ class AudioProcessingImpl : public AudioProcessing {
std::string event_str_; // Memory for protobuf serialization.
#endif
int sample_rate_hz_;
int reverse_sample_rate_hz_;
int split_sample_rate_hz_;
int samples_per_channel_;
int reverse_samples_per_channel_;
AudioFormat fwd_in_format_;
AudioFormat fwd_proc_format_;
AudioRate fwd_out_format_;
AudioFormat rev_in_format_;
AudioFormat rev_proc_format_;
int split_rate_;
int stream_delay_ms_;
int delay_offset_ms_;
bool was_stream_delay_set_;
int num_reverse_channels_;
int num_input_channels_;
int num_output_channels_;
bool output_will_be_muted_;
bool key_pressed_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_

View File

@ -0,0 +1,75 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_COMMON_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_COMMON_H_
#include <string.h>
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
namespace webrtc {
static inline int ChannelsFromLayout(AudioProcessing::ChannelLayout layout) {
switch (layout) {
case AudioProcessing::kMono:
case AudioProcessing::kMonoAndKeyboard:
return 1;
case AudioProcessing::kStereo:
case AudioProcessing::kStereoAndKeyboard:
return 2;
}
assert(false);
return -1;
}
// Helper to encapsulate a contiguous data buffer with access to a pointer
// array of the deinterleaved channels.
template <typename T>
class ChannelBuffer {
public:
ChannelBuffer(int samples_per_channel, int num_channels)
: data_(new T[samples_per_channel * num_channels]),
channels_(new T*[num_channels]),
samples_per_channel_(samples_per_channel),
num_channels_(num_channels) {
memset(data_.get(), 0, sizeof(T) * samples_per_channel * num_channels);
for (int i = 0; i < num_channels; ++i)
channels_[i] = &data_[i * samples_per_channel];
}
~ChannelBuffer() {}
void CopyFrom(const void* channel_ptr, int i) {
assert(i < num_channels_);
memcpy(channels_[i], channel_ptr, samples_per_channel_ * sizeof(T));
}
T* data() { return data_.get(); }
T* channel(int i) {
assert(i < num_channels_);
return channels_[i];
}
T** channels() { return channels_.get(); }
int samples_per_channel() { return samples_per_channel_; }
int num_channels() { return num_channels_; }
int length() { return samples_per_channel_ * num_channels_; }
private:
scoped_ptr<T[]> data_;
scoped_ptr<T*[]> channels_;
int samples_per_channel_;
int num_channels_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_COMMON_H_

View File

@ -4,11 +4,12 @@ package webrtc.audioproc;
message Init {
optional int32 sample_rate = 1;
optional int32 device_sample_rate = 2;
optional int32 device_sample_rate = 2 [deprecated=true];
optional int32 num_input_channels = 3;
optional int32 num_output_channels = 4;
optional int32 num_reverse_channels = 5;
optional int32 reverse_sample_rate = 6;
optional int32 output_sample_rate = 7;
}
// May contain interleaved or deinterleaved data, but don't store both formats.

View File

@ -63,7 +63,6 @@ EchoCancellationImpl::EchoCancellationImpl(const AudioProcessing* apm,
drift_compensation_enabled_(false),
metrics_enabled_(false),
suppression_level_(kModerateSuppression),
device_sample_rate_hz_(48000),
stream_drift_samples_(0),
was_stream_drift_set_(false),
stream_has_echo_(false),
@ -202,20 +201,6 @@ bool EchoCancellationImpl::is_drift_compensation_enabled() const {
return drift_compensation_enabled_;
}
int EchoCancellationImpl::set_device_sample_rate_hz(int rate) {
CriticalSectionScoped crit_scoped(crit_);
if (rate < 8000 || rate > 96000) {
return apm_->kBadParameterError;
}
device_sample_rate_hz_ = rate;
return Initialize();
}
int EchoCancellationImpl::device_sample_rate_hz() const {
return device_sample_rate_hz_;
}
void EchoCancellationImpl::set_stream_drift_samples(int drift) {
was_stream_drift_set_ = true;
stream_drift_samples_ = drift;
@ -358,9 +343,12 @@ void EchoCancellationImpl::DestroyHandle(void* handle) const {
int EchoCancellationImpl::InitializeHandle(void* handle) const {
assert(handle != NULL);
// TODO(ajm): Drift compensation is disabled in practice. If restored, it
// should be managed internally and not depend on the hardware sample rate.
// For now, just hardcode a 48 kHz value.
return WebRtcAec_Init(static_cast<Handle*>(handle),
apm_->sample_rate_hz(),
device_sample_rate_hz_);
apm_->proc_sample_rate_hz(),
48000);
}
int EchoCancellationImpl::ConfigureHandle(void* handle) const {

View File

@ -31,7 +31,6 @@ class EchoCancellationImpl : public EchoCancellation,
// EchoCancellation implementation.
virtual bool is_enabled() const OVERRIDE;
virtual int device_sample_rate_hz() const OVERRIDE;
virtual int stream_drift_samples() const OVERRIDE;
// ProcessingComponent implementation.
@ -43,7 +42,6 @@ class EchoCancellationImpl : public EchoCancellation,
virtual int Enable(bool enable) OVERRIDE;
virtual int enable_drift_compensation(bool enable) OVERRIDE;
virtual bool is_drift_compensation_enabled() const OVERRIDE;
virtual int set_device_sample_rate_hz(int rate) OVERRIDE;
virtual void set_stream_drift_samples(int drift) OVERRIDE;
virtual int set_suppression_level(SuppressionLevel level) OVERRIDE;
virtual SuppressionLevel suppression_level() const OVERRIDE;
@ -69,7 +67,6 @@ class EchoCancellationImpl : public EchoCancellation,
bool drift_compensation_enabled_;
bool metrics_enabled_;
SuppressionLevel suppression_level_;
int device_sample_rate_hz_;
int stream_drift_samples_;
bool was_stream_drift_set_;
bool stream_has_echo_;

View File

@ -241,7 +241,7 @@ int EchoControlMobileImpl::Initialize() {
return apm_->kNoError;
}
if (apm_->sample_rate_hz() == apm_->kSampleRate32kHz) {
if (apm_->proc_sample_rate_hz() > apm_->kSampleRate16kHz) {
LOG(LS_ERROR) << "AECM only supports 16 kHz or lower sample rates";
return apm_->kBadSampleRateError;
}
@ -267,7 +267,7 @@ void EchoControlMobileImpl::DestroyHandle(void* handle) const {
int EchoControlMobileImpl::InitializeHandle(void* handle) const {
assert(handle != NULL);
Handle* my_handle = static_cast<Handle*>(handle);
if (WebRtcAecm_Init(my_handle, apm_->sample_rate_hz()) != 0) {
if (WebRtcAecm_Init(my_handle, apm_->proc_sample_rate_hz()) != 0) {
return GetHandleError(my_handle);
}
if (external_echo_path_ != NULL) {

View File

@ -326,7 +326,7 @@ int GainControlImpl::InitializeHandle(void* handle) const {
minimum_capture_level_,
maximum_capture_level_,
MapSetting(mode_),
apm_->sample_rate_hz());
apm_->proc_sample_rate_hz());
}
int GainControlImpl::ConfigureHandle(void* handle) const {

View File

@ -154,7 +154,7 @@ void HighPassFilterImpl::DestroyHandle(void* handle) const {
int HighPassFilterImpl::InitializeHandle(void* handle) const {
return InitializeFilter(static_cast<Handle*>(handle),
apm_->sample_rate_hz());
apm_->proc_sample_rate_hz());
}
int HighPassFilterImpl::ConfigureHandle(void* /*handle*/) const {

View File

@ -92,8 +92,9 @@ static const int kAudioProcMaxNativeSampleRateHz = 32000;
// 2. Parameter getters are never called concurrently with the corresponding
// setter.
//
// APM accepts only 16-bit linear PCM audio data in frames of 10 ms. Multiple
// channels should be interleaved.
// APM accepts only linear PCM audio data in chunks of 10 ms. The int16
// interfaces use interleaved data, while the float interfaces use deinterleaved
// data.
//
// Usage example, omitting error checking:
// AudioProcessing* apm = AudioProcessing::Create(0);
@ -162,15 +163,27 @@ class AudioProcessing {
// Initializes internal states, while retaining all user settings. This
// should be called before beginning to process a new audio stream. However,
// it is not necessary to call before processing the first stream after
// creation. It is also not necessary to call if the audio parameters (sample
// creation.
//
// It is also not necessary to call if the audio parameters (sample
// rate and number of channels) have changed. Passing updated parameters
// directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible.
// If the parameters are known at init-time though, they may be provided.
virtual int Initialize() = 0;
virtual int Initialize(int sample_rate_hz,
// The int16 interfaces require:
// - only |NativeRate|s be used
// - that the input, output and reverse rates must match
// - that |output_layout| matches |input_layout|
//
// The float interfaces accept arbitrary rates and support differing input
// and output layouts, but the output may only remove channels, not add.
virtual int Initialize(int input_sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
int num_input_channels,
int num_output_channels,
int num_reverse_channels) = 0;
ChannelLayout input_layout,
ChannelLayout output_layout,
ChannelLayout reverse_layout) = 0;
// Pass down additional options which don't have explicit setters. This
// ensures the options are applied immediately.
@ -179,28 +192,20 @@ class AudioProcessing {
virtual int EnableExperimentalNs(bool enable) = 0;
virtual bool experimental_ns_enabled() const = 0;
// DEPRECATED: It is now possible to modify the sample rate directly in a call
// to |ProcessStream|.
// Sets the sample |rate| in Hz for both the primary and reverse audio
// streams. 8000, 16000 or 32000 Hz are permitted.
// DEPRECATED.
// TODO(ajm): Remove after Chromium has upgraded to using Initialize().
virtual int set_sample_rate_hz(int rate) = 0;
virtual int sample_rate_hz() const = 0;
virtual int split_sample_rate_hz() const = 0;
// DEPRECATED.
// TODO(ajm): Remove after voice engine no longer requires it to resample
// the reverse stream to the forward rate.
virtual int input_sample_rate_hz() const = 0;
// DEPRECATED: It is now possible to modify the number of channels directly in
// a call to |ProcessStream|.
// Sets the number of channels for the primary audio stream. Input frames must
// contain a number of channels given by |input_channels|, while output frames
// will be returned with number of channels given by |output_channels|.
virtual int set_num_channels(int input_channels, int output_channels) = 0;
// TODO(ajm): Only intended for internal use. Make private and friend the
// necessary classes?
virtual int proc_sample_rate_hz() const = 0;
virtual int proc_split_sample_rate_hz() const = 0;
virtual int num_input_channels() const = 0;
virtual int num_output_channels() const = 0;
// DEPRECATED: It is now possible to modify the number of channels directly in
// a call to |AnalyzeReverseStream|.
// Sets the number of channels for the reverse audio stream. Input frames must
// contain a number of channels given by |channels|.
virtual int set_num_reverse_channels(int channels) = 0;
virtual int num_reverse_channels() const = 0;
// Set to true when the output of AudioProcessing will be muted or in some
@ -223,15 +228,19 @@ class AudioProcessing {
virtual int ProcessStream(AudioFrame* frame) = 0;
// Accepts deinterleaved float audio with the range [-1, 1]. Each element
// of |data| points to a channel buffer, arranged according to
// of |src| points to a channel buffer, arranged according to
// |input_layout|. At output, the channels will be arranged according to
// |output_layout|.
// TODO(ajm): Output layout conversion does not yet work.
virtual int ProcessStream(float* const* data,
// |output_layout| at |output_sample_rate_hz| in |dest|.
//
// The output layout may only remove channels, not add. |src| and |dest|
// may use the same memory, if desired.
virtual int ProcessStream(const float* const* src,
int samples_per_channel,
int sample_rate_hz,
int input_sample_rate_hz,
ChannelLayout input_layout,
ChannelLayout output_layout) = 0;
int output_sample_rate_hz,
ChannelLayout output_layout,
float* const* dest) = 0;
// Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame
// will not be modified. On the client-side, this is the far-end (or to be
@ -245,7 +254,7 @@ class AudioProcessing {
//
// The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
// members of |frame| must be valid. |sample_rate_hz_| must correspond to
// |sample_rate_hz()|
// |input_sample_rate_hz()|
//
// TODO(ajm): add const to input; requires an implementation fix.
virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;
@ -342,11 +351,13 @@ class AudioProcessing {
kBadStreamParameterWarning = -13
};
enum {
enum NativeRate {
kSampleRate8kHz = 8000,
kSampleRate16kHz = 16000,
kSampleRate32kHz = 32000
};
static const int kChunkSizeMs = 10;
};
// The acoustic echo cancellation (AEC) component provides better performance
@ -367,16 +378,10 @@ class EchoCancellation {
// render and capture devices are used, particularly with webcams.
//
// This enables a compensation mechanism, and requires that
// |set_device_sample_rate_hz()| and |set_stream_drift_samples()| be called.
// set_stream_drift_samples() be called.
virtual int enable_drift_compensation(bool enable) = 0;
virtual bool is_drift_compensation_enabled() const = 0;
// Provides the sampling rate of the audio devices. It is assumed the render
// and capture devices use the same nominal sample rate. Required if and only
// if drift compensation is enabled.
virtual int set_device_sample_rate_hz(int rate) = 0;
virtual int device_sample_rate_hz() const = 0;
// Sets the difference between the number of samples rendered and captured by
// the audio devices since the last call to |ProcessStream()|. Must be called
// if drift compensation is enabled, prior to |ProcessStream()|.

View File

@ -26,10 +26,6 @@ class MockEchoCancellation : public EchoCancellation {
int(bool enable));
MOCK_CONST_METHOD0(is_drift_compensation_enabled,
bool());
MOCK_METHOD1(set_device_sample_rate_hz,
int(int rate));
MOCK_CONST_METHOD0(device_sample_rate_hz,
int());
MOCK_METHOD1(set_stream_drift_samples,
void(int drift));
MOCK_CONST_METHOD0(stream_drift_samples,
@ -181,12 +177,13 @@ class MockAudioProcessing : public AudioProcessing {
MOCK_METHOD0(Initialize,
int());
MOCK_METHOD5(Initialize,
MOCK_METHOD6(Initialize,
int(int sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
int num_input_channels,
int num_output_channels,
int num_reverse_channels));
ChannelLayout input_layout,
ChannelLayout output_layout,
ChannelLayout reverse_layout));
MOCK_METHOD1(SetExtraOptions,
void(const Config& config));
MOCK_METHOD1(EnableExperimentalNs,
@ -195,18 +192,16 @@ class MockAudioProcessing : public AudioProcessing {
bool());
MOCK_METHOD1(set_sample_rate_hz,
int(int rate));
MOCK_CONST_METHOD0(sample_rate_hz,
MOCK_CONST_METHOD0(input_sample_rate_hz,
int());
MOCK_CONST_METHOD0(split_sample_rate_hz,
MOCK_CONST_METHOD0(proc_sample_rate_hz,
int());
MOCK_CONST_METHOD0(proc_split_sample_rate_hz,
int());
MOCK_METHOD2(set_num_channels,
int(int input_channels, int output_channels));
MOCK_CONST_METHOD0(num_input_channels,
int());
MOCK_CONST_METHOD0(num_output_channels,
int());
MOCK_METHOD1(set_num_reverse_channels,
int(int channels));
MOCK_CONST_METHOD0(num_reverse_channels,
int());
MOCK_METHOD1(set_output_will_be_muted,
@ -215,10 +210,14 @@ class MockAudioProcessing : public AudioProcessing {
bool());
MOCK_METHOD1(ProcessStream,
int(AudioFrame* frame));
MOCK_METHOD5(ProcessStream,
int(float* const* data, int frames, int sample_rate_hz,
MOCK_METHOD7(ProcessStream,
int(const float* const* src,
int samples_per_channel,
int input_sample_rate_hz,
ChannelLayout input_layout,
ChannelLayout output_layout));
int output_sample_rate_hz,
ChannelLayout output_layout,
float* const* dest));
MOCK_METHOD1(AnalyzeReverseStream,
int(AudioFrame* frame));
MOCK_METHOD4(AnalyzeReverseStream,

View File

@ -151,9 +151,11 @@ void NoiseSuppressionImpl::DestroyHandle(void* handle) const {
int NoiseSuppressionImpl::InitializeHandle(void* handle) const {
#if defined(WEBRTC_NS_FLOAT)
return WebRtcNs_Init(static_cast<Handle*>(handle), apm_->sample_rate_hz());
return WebRtcNs_Init(static_cast<Handle*>(handle),
apm_->proc_sample_rate_hz());
#elif defined(WEBRTC_NS_FIXED)
return WebRtcNsx_Init(static_cast<Handle*>(handle), apm_->sample_rate_hz());
return WebRtcNsx_Init(static_cast<Handle*>(handle),
apm_->proc_sample_rate_hz());
#endif
}

View File

@ -8,11 +8,15 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <math.h>
#include <stdio.h>
#include <algorithm>
#include <limits>
#include <queue>
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/resampler/include/push_resampler.h"
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/modules/audio_processing/test/test_utils.h"
@ -61,33 +65,39 @@ const int kProcessSampleRates[] = {8000, 16000, 32000};
const size_t kProcessSampleRatesSize = sizeof(kProcessSampleRates) /
sizeof(*kProcessSampleRates);
void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) {
ChannelBuffer<int16_t> cb_int(frame.samples_per_channel_,
frame.num_channels_);
Deinterleave(frame.data_,
frame.samples_per_channel_,
frame.num_channels_,
void ConvertToFloat(const int16_t* int_data, ChannelBuffer<float>* cb) {
ChannelBuffer<int16_t> cb_int(cb->samples_per_channel(),
cb->num_channels());
Deinterleave(int_data,
cb->samples_per_channel(),
cb->num_channels(),
cb_int.channels());
ScaleToFloat(cb_int.data(),
frame.samples_per_channel_ * frame.num_channels_,
cb->samples_per_channel() * cb->num_channels(),
cb->data());
}
void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) {
ConvertToFloat(frame.data_, cb);
}
int TruncateToMultipleOf10(int value) {
return (value / 10) * 10;
}
// TODO(andrew): Use the MonoToStereo routine from AudioFrameOperations.
void MixStereoToMono(const int16_t* stereo,
int16_t* mono,
void MixStereoToMono(const float* stereo, float* mono,
int samples_per_channel) {
for (int i = 0; i < samples_per_channel; i++) {
int32_t mono_s32 = (static_cast<int32_t>(stereo[i * 2]) +
static_cast<int32_t>(stereo[i * 2 + 1])) >> 1;
mono[i] = static_cast<int16_t>(mono_s32);
for (int i = 0; i < samples_per_channel; ++i) {
mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) / 2;
}
}
void MixStereoToMono(const int16_t* stereo, int16_t* mono,
int samples_per_channel) {
for (int i = 0; i < samples_per_channel; i++)
mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) >> 1;
}
void CopyLeftToRightChannel(int16_t* stereo, int samples_per_channel) {
for (int i = 0; i < samples_per_channel; i++) {
stereo[i * 2 + 1] = stereo[i * 2];
@ -211,6 +221,33 @@ void OpenFileAndWriteMessage(const std::string filename,
}
#endif // WEBRTC_AUDIOPROC_BIT_EXACT
std::string ResourceFilePath(std::string name, int sample_rate_hz) {
std::ostringstream ss;
// Resource files are all stereo.
ss << name << sample_rate_hz / 1000 << "_stereo";
return test::ResourcePath(ss.str(), "pcm");
}
std::string OutputFilePath(std::string name,
int sample_rate_hz,
int num_input_channels,
int num_output_channels,
int num_reverse_channels) {
std::ostringstream ss;
ss << name << sample_rate_hz / 1000 << "_" << num_reverse_channels << "r" <<
num_input_channels << "i" << "_";
if (num_output_channels == 1) {
ss << "mono";
} else if (num_output_channels == 2) {
ss << "stereo";
} else {
assert(false);
}
ss << ".pcm";
return test::OutputPath() + ss.str();
}
void OpenFileAndReadMessage(const std::string filename,
::google::protobuf::MessageLite* msg) {
FILE* file = fopen(filename.c_str(), "rb");
@ -242,18 +279,13 @@ class ApmTest : public ::testing::Test {
};
void Init(int sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
int num_reverse_channels,
int num_input_channels,
int num_output_channels,
bool open_output_file);
void Init(AudioProcessing* ap);
std::string ResourceFilePath(std::string name, int sample_rate_hz);
std::string OutputFilePath(std::string name,
int sample_rate_hz,
int num_reverse_channels,
int num_input_channels,
int num_output_channels);
void EnableAllComponents();
bool ReadFrame(FILE* file, AudioFrame* frame);
bool ReadFrame(FILE* file, AudioFrame* frame, ChannelBuffer<float>* cb);
@ -268,7 +300,6 @@ class ApmTest : public ::testing::Test {
void RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate);
void RunManualVolumeChangeIsPossibleTest(int sample_rate);
void StreamParametersTest(Format format);
void SampleRatesTest(Format format);
int ProcessStreamChooser(Format format);
int AnalyzeReverseStreamChooser(Format format);
void ProcessDebugDump(const std::string& in_filename,
@ -284,6 +315,7 @@ class ApmTest : public ::testing::Test {
AudioFrame* revframe_;
scoped_ptr<ChannelBuffer<float> > float_cb_;
scoped_ptr<ChannelBuffer<float> > revfloat_cb_;
int output_sample_rate_hz_;
int num_output_channels_;
FILE* far_file_;
FILE* near_file_;
@ -300,6 +332,7 @@ ApmTest::ApmTest()
#endif
frame_(NULL),
revframe_(NULL),
output_sample_rate_hz_(0),
num_output_channels_(0),
far_file_(NULL),
near_file_(NULL),
@ -316,9 +349,9 @@ void ApmTest::SetUp() {
revframe_ = new AudioFrame();
#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
Init(16000, 16000, 2, 2, 2, false);
Init(16000, 16000, 16000, 2, 2, 2, false);
#else
Init(32000, 32000, 2, 2, 2, false);
Init(32000, 32000, 32000, 2, 2, 2, false);
#endif
}
@ -349,49 +382,25 @@ void ApmTest::TearDown() {
out_file_ = NULL;
}
std::string ApmTest::ResourceFilePath(std::string name, int sample_rate_hz) {
std::ostringstream ss;
// Resource files are all stereo.
ss << name << sample_rate_hz / 1000 << "_stereo";
return test::ResourcePath(ss.str(), "pcm");
}
std::string ApmTest::OutputFilePath(std::string name,
int sample_rate_hz,
int num_reverse_channels,
int num_input_channels,
int num_output_channels) {
std::ostringstream ss;
ss << name << sample_rate_hz / 1000 << "_" << num_reverse_channels << "r" <<
num_input_channels << "i" << "_";
if (num_output_channels == 1) {
ss << "mono";
} else if (num_output_channels == 2) {
ss << "stereo";
} else {
assert(false);
return "";
}
ss << ".pcm";
return output_path_ + ss.str();
}
void ApmTest::Init(AudioProcessing* ap) {
ASSERT_EQ(ap->kNoError, ap->Initialize(frame_->sample_rate_hz_,
revframe_->sample_rate_hz_,
frame_->num_channels_,
num_output_channels_,
revframe_->num_channels_));
ASSERT_EQ(kNoErr,
ap->Initialize(frame_->sample_rate_hz_,
output_sample_rate_hz_,
revframe_->sample_rate_hz_,
LayoutFromChannels(frame_->num_channels_),
LayoutFromChannels(num_output_channels_),
LayoutFromChannels(revframe_->num_channels_)));
}
void ApmTest::Init(int sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
int num_input_channels,
int num_output_channels,
int num_reverse_channels,
bool open_output_file) {
SetContainerFormat(sample_rate_hz, num_input_channels, frame_, &float_cb_);
output_sample_rate_hz_ = output_sample_rate_hz;
num_output_channels_ = num_output_channels;
SetContainerFormat(reverse_sample_rate_hz, num_reverse_channels, revframe_,
@ -418,8 +427,8 @@ void ApmTest::Init(int sample_rate_hz,
if (out_file_) {
ASSERT_EQ(0, fclose(out_file_));
}
filename = OutputFilePath("out", sample_rate_hz, num_reverse_channels,
num_input_channels, num_output_channels);
filename = OutputFilePath("out", sample_rate_hz, num_input_channels,
num_output_channels, num_reverse_channels);
out_file_ = fopen(filename.c_str(), "wb");
ASSERT_TRUE(out_file_ != NULL) << "Could not open file " <<
filename << "\n";
@ -485,12 +494,13 @@ int ApmTest::ProcessStreamChooser(Format format) {
if (format == kIntFormat) {
return apm_->ProcessStream(frame_);
}
// TODO(ajm): Update to match the number of output channels when supported.
return apm_->ProcessStream(float_cb_->channels(),
frame_->samples_per_channel_,
frame_->sample_rate_hz_,
LayoutFromChannels(frame_->num_channels_),
LayoutFromChannels(frame_->num_channels_));
output_sample_rate_hz_,
LayoutFromChannels(num_output_channels_),
float_cb_->channels());
}
int ApmTest::AnalyzeReverseStreamChooser(Format format) {
@ -726,27 +736,19 @@ TEST_F(ApmTest, Channels) {
}
}
void ApmTest::SampleRatesTest(Format format) {
TEST_F(ApmTest, SampleRatesInt) {
// Testing invalid sample rates
SetContainerFormat(10000, 2, frame_, &float_cb_);
EXPECT_EQ(apm_->kBadSampleRateError, ProcessStreamChooser(format));
EXPECT_EQ(apm_->kBadSampleRateError, ProcessStreamChooser(kIntFormat));
// Testing valid sample rates
int fs[] = {8000, 16000, 32000};
for (size_t i = 0; i < sizeof(fs) / sizeof(*fs); i++) {
SetContainerFormat(fs[i], 2, frame_, &float_cb_);
EXPECT_NOERR(ProcessStreamChooser(format));
EXPECT_EQ(fs[i], apm_->sample_rate_hz());
EXPECT_NOERR(ProcessStreamChooser(kIntFormat));
EXPECT_EQ(fs[i], apm_->input_sample_rate_hz());
}
}
TEST_F(ApmTest, SampleRatesInt) {
SampleRatesTest(kIntFormat);
}
TEST_F(ApmTest, SampleRatesFloat) {
SampleRatesTest(kFloatFormat);
}
TEST_F(ApmTest, EchoCancellation) {
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->enable_drift_compensation(true));
@ -755,19 +757,6 @@ TEST_F(ApmTest, EchoCancellation) {
apm_->echo_cancellation()->enable_drift_compensation(false));
EXPECT_FALSE(apm_->echo_cancellation()->is_drift_compensation_enabled());
EXPECT_EQ(apm_->kBadParameterError,
apm_->echo_cancellation()->set_device_sample_rate_hz(4000));
EXPECT_EQ(apm_->kBadParameterError,
apm_->echo_cancellation()->set_device_sample_rate_hz(100000));
int rate[] = {16000, 44100, 48000};
for (size_t i = 0; i < sizeof(rate)/sizeof(*rate); i++) {
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->set_device_sample_rate_hz(rate[i]));
EXPECT_EQ(rate[i],
apm_->echo_cancellation()->device_sample_rate_hz());
}
EchoCancellation::SuppressionLevel level[] = {
EchoCancellation::kLowSuppression,
EchoCancellation::kModerateSuppression,
@ -845,7 +834,13 @@ TEST_F(ApmTest, EchoCancellationReportsCorrectDelays) {
// within a valid region (set to +-1.5 blocks). Note that these cases are
// sampling frequency dependent.
for (size_t i = 0; i < kProcessSampleRatesSize; i++) {
Init(kProcessSampleRates[i], kProcessSampleRates[i], 2, 2, 2, false);
Init(kProcessSampleRates[i],
kProcessSampleRates[i],
kProcessSampleRates[i],
2,
2,
2,
false);
// Sampling frequency dependent variables.
const int num_ms_per_block = std::max(4,
640 / frame_->samples_per_channel_);
@ -898,7 +893,7 @@ TEST_F(ApmTest, EchoControlMobile) {
EXPECT_EQ(apm_->kUnsupportedComponentError, apm_->ProcessStream(frame_));
// Turn AECM on (and AEC off)
Init(16000, 16000, 2, 2, 2, false);
Init(16000, 16000, 16000, 2, 2, 2, false);
EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true));
EXPECT_TRUE(apm_->echo_control_mobile()->is_enabled());
@ -926,8 +921,8 @@ TEST_F(ApmTest, EchoControlMobile) {
// Set and get echo path
const size_t echo_path_size =
apm_->echo_control_mobile()->echo_path_size_bytes();
scoped_array<char> echo_path_in(new char[echo_path_size]);
scoped_array<char> echo_path_out(new char[echo_path_size]);
scoped_ptr<char[]> echo_path_in(new char[echo_path_size]);
scoped_ptr<char[]> echo_path_out(new char[echo_path_size]);
EXPECT_EQ(apm_->kNullPointerError,
apm_->echo_control_mobile()->SetEchoPath(NULL, echo_path_size));
EXPECT_EQ(apm_->kNullPointerError,
@ -1061,7 +1056,7 @@ TEST_F(ApmTest, GainControl) {
}
void ApmTest::RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate) {
Init(sample_rate, sample_rate, 2, 2, 2, false);
Init(sample_rate, sample_rate, sample_rate, 2, 2, 2, false);
EXPECT_EQ(apm_->kNoError,
apm_->gain_control()->set_mode(GainControl::kAdaptiveAnalog));
EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true));
@ -1092,7 +1087,7 @@ TEST_F(ApmTest, QuantizedVolumeDoesNotGetStuck) {
}
void ApmTest::RunManualVolumeChangeIsPossibleTest(int sample_rate) {
Init(sample_rate, sample_rate, 2, 2, 2, false);
Init(sample_rate, sample_rate, sample_rate, 2, 2, 2, false);
EXPECT_EQ(apm_->kNoError,
apm_->gain_control()->set_mode(GainControl::kAdaptiveAnalog));
EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true));
@ -1314,7 +1309,7 @@ TEST_F(ApmTest, AllProcessingDisabledByDefault) {
TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabled) {
for (size_t i = 0; i < kSampleRatesSize; i++) {
Init(kSampleRates[i], kSampleRates[i], 2, 2, 2, false);
Init(kSampleRates[i], kSampleRates[i], kSampleRates[i], 2, 2, 2, false);
SetFrameTo(frame_, 1000, 2000);
AudioFrame frame_copy;
frame_copy.CopyFrom(*frame_);
@ -1329,23 +1324,29 @@ TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) {
EnableAllComponents();
for (size_t i = 0; i < kProcessSampleRatesSize; i++) {
Init(kProcessSampleRates[i], kProcessSampleRates[i], 2, 2, 2, false);
Init(kProcessSampleRates[i],
kProcessSampleRates[i],
kProcessSampleRates[i],
2,
2,
2,
false);
int analog_level = 127;
EXPECT_EQ(0, feof(far_file_));
EXPECT_EQ(0, feof(near_file_));
ASSERT_EQ(0, feof(far_file_));
ASSERT_EQ(0, feof(near_file_));
while (ReadFrame(far_file_, revframe_) && ReadFrame(near_file_, frame_)) {
CopyLeftToRightChannel(revframe_->data_, revframe_->samples_per_channel_);
EXPECT_EQ(apm_->kNoError, apm_->AnalyzeReverseStream(revframe_));
ASSERT_EQ(kNoErr, apm_->AnalyzeReverseStream(revframe_));
CopyLeftToRightChannel(frame_->data_, frame_->samples_per_channel_);
frame_->vad_activity_ = AudioFrame::kVadUnknown;
EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0));
ASSERT_EQ(kNoErr, apm_->set_stream_delay_ms(0));
apm_->echo_cancellation()->set_stream_drift_samples(0);
EXPECT_EQ(apm_->kNoError,
ASSERT_EQ(kNoErr,
apm_->gain_control()->set_stream_analog_level(analog_level));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
ASSERT_EQ(kNoErr, apm_->ProcessStream(frame_));
analog_level = apm_->gain_control()->stream_analog_level();
VerifyChannelsAreEqual(frame_->data_, frame_->samples_per_channel_);
@ -1442,7 +1443,13 @@ void ApmTest::ProcessDebugDump(const std::string& in_filename,
if (msg.has_reverse_sample_rate()) {
reverse_sample_rate = msg.reverse_sample_rate();
}
int output_sample_rate = msg.sample_rate();
if (msg.has_output_sample_rate()) {
output_sample_rate = msg.output_sample_rate();
}
Init(msg.sample_rate(),
output_sample_rate,
reverse_sample_rate,
msg.num_input_channels(),
msg.num_output_channels(),
@ -1644,11 +1651,12 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) {
const int num_render_channels = test->num_reverse_channels();
const int num_input_channels = test->num_input_channels();
const int num_output_channels = test->num_output_channels();
const int samples_per_channel = test->sample_rate() * kChunkSizeMs / 1000;
const int samples_per_channel = test->sample_rate() *
AudioProcessing::kChunkSizeMs / 1000;
const int output_length = samples_per_channel * num_output_channels;
Init(test->sample_rate(), test->sample_rate(), num_input_channels,
num_output_channels, num_render_channels, true);
Init(test->sample_rate(), test->sample_rate(), test->sample_rate(),
num_input_channels, num_output_channels, num_render_channels, true);
Init(fapm.get());
ChannelBuffer<int16_t> output_cb(samples_per_channel, num_input_channels);
@ -1674,12 +1682,15 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) {
EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level));
EXPECT_NOERR(apm_->ProcessStream(frame_));
// TODO(ajm): Update to support different output rates.
EXPECT_NOERR(fapm->ProcessStream(
float_cb_->channels(),
samples_per_channel,
test->sample_rate(),
LayoutFromChannels(num_input_channels),
LayoutFromChannels(num_output_channels)));
test->sample_rate(),
LayoutFromChannels(num_output_channels),
float_cb_->channels()));
// Convert to interleaved int16.
ScaleAndRoundToInt16(float_cb_->data(), output_length, output_cb.data());
@ -1746,8 +1757,13 @@ TEST_F(ApmTest, DISABLED_ON_ANDROID(Process)) {
if (test->num_input_channels() != test->num_output_channels())
continue;
Init(test->sample_rate(), test->sample_rate(), test->num_input_channels(),
test->num_output_channels(), test->num_reverse_channels(), true);
Init(test->sample_rate(),
test->sample_rate(),
test->sample_rate(),
test->num_input_channels(),
test->num_output_channels(),
test->num_reverse_channels(),
true);
int frame_count = 0;
int has_echo_count = 0;
@ -1890,8 +1906,453 @@ TEST_F(ApmTest, DISABLED_ON_ANDROID(Process)) {
OpenFileAndWriteMessage(ref_filename_, ref_data);
}
}
#endif // WEBRTC_AUDIOPROC_BIT_EXACT
// Reads a 10 ms chunk of int16 interleaved audio from the given (assumed
// stereo) file, converts to deinterleaved float (optionally downmixing) and
// returns the result in |cb|. Returns false if the file ended (or on error) and
// true otherwise.
//
// |int_data| and |float_data| are just temporary space that must be
// sufficiently large to hold the 10 ms chunk.
bool ReadChunk(FILE* file, int16_t* int_data, float* float_data,
ChannelBuffer<float>* cb) {
// The files always contain stereo audio.
size_t frame_size = cb->samples_per_channel() * 2;
size_t read_count = fread(int_data, sizeof(int16_t), frame_size, file);
if (read_count != frame_size) {
// Check that the file really ended.
assert(feof(file));
return false; // This is expected.
}
ScaleToFloat(int_data, frame_size, float_data);
if (cb->num_channels() == 1) {
MixStereoToMono(float_data, cb->data(), cb->samples_per_channel());
} else {
Deinterleave(float_data, cb->samples_per_channel(), 2,
cb->channels());
}
return true;
}
// Compares the reference and test arrays over a region around the expected
// delay. Finds the highest SNR in that region and adds the variance and squared
// error results to the supplied accumulators.
void UpdateBestSNR(const float* ref,
const float* test,
int length,
int expected_delay,
double* variance_acc,
double* sq_error_acc) {
double best_snr = std::numeric_limits<double>::min();
double best_variance = 0;
double best_sq_error = 0;
// Search over a region of eight samples around the expected delay.
for (int delay = std::max(expected_delay - 4, 0); delay <= expected_delay + 4;
++delay) {
double sq_error = 0;
double variance = 0;
for (int i = 0; i < length - delay; ++i) {
double error = test[i + delay] - ref[i];
sq_error += error * error;
variance += ref[i] * ref[i];
}
if (sq_error == 0) {
*variance_acc += variance;
return;
}
double snr = variance / sq_error;
if (snr > best_snr) {
best_snr = snr;
best_variance = variance;
best_sq_error = sq_error;
}
}
*variance_acc += best_variance;
*sq_error_acc += best_sq_error;
}
// Used to test a multitude of sample rate and channel combinations. It works
// by first producing a set of reference files (in SetUpTestCase) that are
// assumed to be correct, as the used parameters are verified by other tests
// in this collection. Primarily the reference files are all produced at
// "native" rates which do not involve any resampling.
// Each test pass produces an output file with a particular format. The output
// is matched against the reference file closest to its internal processing
// format. If necessary the output is resampled back to its process format.
// Due to the resampling distortion, we don't expect identical results, but
// enforce SNR thresholds which vary depending on the format. 0 is a special
// case SNR which corresponds to inf, or zero error.
typedef std::tr1::tuple<int, int, int, double> AudioProcessingTestData;
class AudioProcessingTest
: public testing::TestWithParam<AudioProcessingTestData> {
public:
AudioProcessingTest()
: input_rate_(std::tr1::get<0>(GetParam())),
output_rate_(std::tr1::get<1>(GetParam())),
reverse_rate_(std::tr1::get<2>(GetParam())),
expected_snr_(std::tr1::get<3>(GetParam())) {}
virtual ~AudioProcessingTest() {}
static void SetUpTestCase() {
// Create all needed output reference files.
const int kNativeRates[] = {8000, 16000, 32000};
const size_t kNativeRatesSize =
sizeof(kNativeRates) / sizeof(*kNativeRates);
const int kNumChannels[] = {1, 2};
const size_t kNumChannelsSize =
sizeof(kNumChannels) / sizeof(*kNumChannels);
for (size_t i = 0; i < kNativeRatesSize; ++i) {
for (size_t j = 0; j < kNumChannelsSize; ++j) {
for (size_t k = 0; k < kNumChannelsSize; ++k) {
// The reference files always have matching input and output channels.
ProcessFormat(kNativeRates[i],
kNativeRates[i],
kNativeRates[i],
kNumChannels[j],
kNumChannels[j],
kNumChannels[k],
"ref");
}
}
}
}
// Runs a process pass on files with the given parameters and dumps the output
// to a file specified with |output_file_prefix|.
static void ProcessFormat(int input_rate,
int output_rate,
int reverse_rate,
int num_input_channels,
int num_output_channels,
int num_reverse_channels,
std::string output_file_prefix) {
scoped_ptr<AudioProcessing> ap(AudioProcessing::Create());
EnableAllAPComponents(ap.get());
ap->Initialize(input_rate,
output_rate,
reverse_rate,
LayoutFromChannels(num_input_channels),
LayoutFromChannels(num_output_channels),
LayoutFromChannels(num_reverse_channels));
FILE* far_file = fopen(ResourceFilePath("far", reverse_rate).c_str(), "rb");
FILE* near_file = fopen(ResourceFilePath("near", input_rate).c_str(), "rb");
FILE* out_file = fopen(OutputFilePath(output_file_prefix,
output_rate,
num_input_channels,
num_output_channels,
num_reverse_channels).c_str(), "wb");
ASSERT_TRUE(far_file != NULL);
ASSERT_TRUE(near_file != NULL);
ASSERT_TRUE(out_file != NULL);
ChannelBuffer<float> fwd_cb(SamplesFromRate(input_rate),
num_input_channels);
ChannelBuffer<float> rev_cb(SamplesFromRate(reverse_rate),
num_reverse_channels);
ChannelBuffer<float> out_cb(SamplesFromRate(output_rate),
num_output_channels);
// Temporary buffers.
const int max_length =
2 * std::max(out_cb.samples_per_channel(),
std::max(fwd_cb.samples_per_channel(),
rev_cb.samples_per_channel()));
scoped_ptr<float[]> float_data(new float[max_length]);
scoped_ptr<int16_t[]> int_data(new int16_t[max_length]);
int analog_level = 127;
while (ReadChunk(far_file, int_data.get(), float_data.get(), &rev_cb) &&
ReadChunk(near_file, int_data.get(), float_data.get(), &fwd_cb)) {
EXPECT_NOERR(ap->AnalyzeReverseStream(
rev_cb.channels(),
rev_cb.samples_per_channel(),
reverse_rate,
LayoutFromChannels(num_reverse_channels)));
EXPECT_NOERR(ap->set_stream_delay_ms(0));
ap->echo_cancellation()->set_stream_drift_samples(0);
EXPECT_NOERR(ap->gain_control()->set_stream_analog_level(analog_level));
EXPECT_NOERR(ap->ProcessStream(
fwd_cb.channels(),
fwd_cb.samples_per_channel(),
input_rate,
LayoutFromChannels(num_input_channels),
output_rate,
LayoutFromChannels(num_output_channels),
out_cb.channels()));
Interleave(out_cb.channels(),
out_cb.samples_per_channel(),
out_cb.num_channels(),
float_data.get());
// Dump output to file.
ASSERT_EQ(static_cast<size_t>(out_cb.length()),
fwrite(float_data.get(), sizeof(float_data[0]),
out_cb.length(), out_file));
analog_level = ap->gain_control()->stream_analog_level();
}
fclose(far_file);
fclose(near_file);
fclose(out_file);
}
protected:
int input_rate_;
int output_rate_;
int reverse_rate_;
double expected_snr_;
};
TEST_P(AudioProcessingTest, Formats) {
struct ChannelFormat {
int num_input;
int num_output;
int num_reverse;
};
ChannelFormat cf[] = {
{1, 1, 1},
{1, 1, 2},
{2, 1, 1},
{2, 1, 2},
{2, 2, 1},
{2, 2, 2},
};
size_t channel_format_size = sizeof(cf) / sizeof(*cf);
for (size_t i = 0; i < channel_format_size; ++i) {
ProcessFormat(input_rate_,
output_rate_,
reverse_rate_,
cf[i].num_input,
cf[i].num_output,
cf[i].num_reverse,
"out");
int min_ref_rate = std::min(input_rate_, output_rate_);
int ref_rate;
if (min_ref_rate > 16000) {
ref_rate = 32000;
} else if (min_ref_rate > 8000) {
ref_rate = 16000;
} else {
ref_rate = 8000;
}
#ifdef WEBRTC_AUDIOPROC_FIXED_PROFILE
ref_rate = std::min(ref_rate, 16000);
#endif
FILE* out_file = fopen(OutputFilePath("out",
output_rate_,
cf[i].num_input,
cf[i].num_output,
cf[i].num_reverse).c_str(), "rb");
// The reference files always have matching input and output channels.
FILE* ref_file = fopen(OutputFilePath("ref",
ref_rate,
cf[i].num_output,
cf[i].num_output,
cf[i].num_reverse).c_str(), "rb");
ASSERT_TRUE(out_file != NULL);
ASSERT_TRUE(ref_file != NULL);
const int ref_length = SamplesFromRate(ref_rate) * cf[i].num_output;
const int out_length = SamplesFromRate(output_rate_) * cf[i].num_output;
// Data from the reference file.
scoped_ptr<float[]> ref_data(new float[ref_length]);
// Data from the output file.
scoped_ptr<float[]> out_data(new float[out_length]);
// Data from the resampled output, in case the reference and output rates
// don't match.
scoped_ptr<float[]> cmp_data(new float[ref_length]);
PushResampler<float> resampler;
resampler.InitializeIfNeeded(output_rate_, ref_rate, cf[i].num_output);
// Compute the resampling delay of the output relative to the reference,
// to find the region over which we should search for the best SNR.
float expected_delay_sec = 0;
if (input_rate_ != ref_rate) {
// Input resampling delay.
expected_delay_sec +=
PushSincResampler::AlgorithmicDelaySeconds(input_rate_);
}
if (output_rate_ != ref_rate) {
// Output resampling delay.
expected_delay_sec +=
PushSincResampler::AlgorithmicDelaySeconds(ref_rate);
// Delay of converting the output back to its processing rate for testing.
expected_delay_sec +=
PushSincResampler::AlgorithmicDelaySeconds(output_rate_);
}
int expected_delay = floor(expected_delay_sec * ref_rate + 0.5f) *
cf[i].num_output;
double variance = 0;
double sq_error = 0;
while (fread(out_data.get(), sizeof(out_data[0]), out_length, out_file) &&
fread(ref_data.get(), sizeof(ref_data[0]), ref_length, ref_file)) {
float* out_ptr = out_data.get();
if (output_rate_ != ref_rate) {
// Resample the output back to its internal processing rate if necssary.
ASSERT_EQ(ref_length, resampler.Resample(out_ptr,
out_length,
cmp_data.get(),
ref_length));
out_ptr = cmp_data.get();
}
// Update the |sq_error| and |variance| accumulators with the highest SNR
// of reference vs output.
UpdateBestSNR(ref_data.get(),
out_ptr,
ref_length,
expected_delay,
&variance,
&sq_error);
}
std::cout << "(" << input_rate_ << ", "
<< output_rate_ << ", "
<< reverse_rate_ << ", "
<< cf[i].num_input << ", "
<< cf[i].num_output << ", "
<< cf[i].num_reverse << "): ";
if (sq_error > 0) {
double snr = 10 * log10(variance / sq_error);
EXPECT_GE(snr, expected_snr_);
EXPECT_NE(0, expected_snr_);
std::cout << "SNR=" << snr << " dB" << std::endl;
} else {
EXPECT_EQ(expected_snr_, 0);
std::cout << "SNR=" << "inf dB" << std::endl;
}
fclose(out_file);
fclose(ref_file);
}
}
#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
INSTANTIATE_TEST_CASE_P(
CommonFormats, AudioProcessingTest, testing::Values(
std::tr1::make_tuple(48000, 48000, 48000, 25),
std::tr1::make_tuple(48000, 48000, 32000, 25),
std::tr1::make_tuple(48000, 48000, 16000, 25),
std::tr1::make_tuple(48000, 44100, 48000, 20),
std::tr1::make_tuple(48000, 44100, 32000, 20),
std::tr1::make_tuple(48000, 44100, 16000, 20),
std::tr1::make_tuple(48000, 32000, 48000, 25),
std::tr1::make_tuple(48000, 32000, 32000, 25),
std::tr1::make_tuple(48000, 32000, 16000, 25),
std::tr1::make_tuple(48000, 16000, 48000, 25),
std::tr1::make_tuple(48000, 16000, 32000, 25),
std::tr1::make_tuple(48000, 16000, 16000, 25),
std::tr1::make_tuple(44100, 48000, 48000, 20),
std::tr1::make_tuple(44100, 48000, 32000, 20),
std::tr1::make_tuple(44100, 48000, 16000, 20),
std::tr1::make_tuple(44100, 44100, 48000, 20),
std::tr1::make_tuple(44100, 44100, 32000, 20),
std::tr1::make_tuple(44100, 44100, 16000, 20),
std::tr1::make_tuple(44100, 32000, 48000, 20),
std::tr1::make_tuple(44100, 32000, 32000, 20),
std::tr1::make_tuple(44100, 32000, 16000, 20),
std::tr1::make_tuple(44100, 16000, 48000, 20),
std::tr1::make_tuple(44100, 16000, 32000, 20),
std::tr1::make_tuple(44100, 16000, 16000, 20),
std::tr1::make_tuple(32000, 48000, 48000, 25),
std::tr1::make_tuple(32000, 48000, 32000, 25),
std::tr1::make_tuple(32000, 48000, 16000, 25),
std::tr1::make_tuple(32000, 44100, 48000, 20),
std::tr1::make_tuple(32000, 44100, 32000, 20),
std::tr1::make_tuple(32000, 44100, 16000, 20),
std::tr1::make_tuple(32000, 32000, 48000, 30),
std::tr1::make_tuple(32000, 32000, 32000, 0),
std::tr1::make_tuple(32000, 32000, 16000, 30),
std::tr1::make_tuple(32000, 16000, 48000, 25),
std::tr1::make_tuple(32000, 16000, 32000, 25),
std::tr1::make_tuple(32000, 16000, 16000, 25),
std::tr1::make_tuple(16000, 48000, 48000, 25),
std::tr1::make_tuple(16000, 48000, 32000, 25),
std::tr1::make_tuple(16000, 48000, 16000, 25),
std::tr1::make_tuple(16000, 44100, 48000, 15),
std::tr1::make_tuple(16000, 44100, 32000, 15),
std::tr1::make_tuple(16000, 44100, 16000, 15),
std::tr1::make_tuple(16000, 32000, 48000, 25),
std::tr1::make_tuple(16000, 32000, 32000, 25),
std::tr1::make_tuple(16000, 32000, 16000, 25),
std::tr1::make_tuple(16000, 16000, 48000, 30),
std::tr1::make_tuple(16000, 16000, 32000, 30),
std::tr1::make_tuple(16000, 16000, 16000, 0)));
#elif defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
INSTANTIATE_TEST_CASE_P(
CommonFormats, AudioProcessingTest, testing::Values(
std::tr1::make_tuple(48000, 48000, 48000, 20),
std::tr1::make_tuple(48000, 48000, 32000, 20),
std::tr1::make_tuple(48000, 48000, 16000, 20),
std::tr1::make_tuple(48000, 44100, 48000, 15),
std::tr1::make_tuple(48000, 44100, 32000, 15),
std::tr1::make_tuple(48000, 44100, 16000, 15),
std::tr1::make_tuple(48000, 32000, 48000, 20),
std::tr1::make_tuple(48000, 32000, 32000, 20),
std::tr1::make_tuple(48000, 32000, 16000, 20),
std::tr1::make_tuple(48000, 16000, 48000, 20),
std::tr1::make_tuple(48000, 16000, 32000, 20),
std::tr1::make_tuple(48000, 16000, 16000, 20),
std::tr1::make_tuple(44100, 48000, 48000, 19),
std::tr1::make_tuple(44100, 48000, 32000, 19),
std::tr1::make_tuple(44100, 48000, 16000, 19),
std::tr1::make_tuple(44100, 44100, 48000, 15),
std::tr1::make_tuple(44100, 44100, 32000, 15),
std::tr1::make_tuple(44100, 44100, 16000, 15),
std::tr1::make_tuple(44100, 32000, 48000, 19),
std::tr1::make_tuple(44100, 32000, 32000, 19),
std::tr1::make_tuple(44100, 32000, 16000, 19),
std::tr1::make_tuple(44100, 16000, 48000, 19),
std::tr1::make_tuple(44100, 16000, 32000, 19),
std::tr1::make_tuple(44100, 16000, 16000, 19),
std::tr1::make_tuple(32000, 48000, 48000, 19),
std::tr1::make_tuple(32000, 48000, 32000, 19),
std::tr1::make_tuple(32000, 48000, 16000, 19),
std::tr1::make_tuple(32000, 44100, 48000, 15),
std::tr1::make_tuple(32000, 44100, 32000, 15),
std::tr1::make_tuple(32000, 44100, 16000, 15),
std::tr1::make_tuple(32000, 32000, 48000, 19),
std::tr1::make_tuple(32000, 32000, 32000, 19),
std::tr1::make_tuple(32000, 32000, 16000, 19),
std::tr1::make_tuple(32000, 16000, 48000, 19),
std::tr1::make_tuple(32000, 16000, 32000, 19),
std::tr1::make_tuple(32000, 16000, 16000, 19),
std::tr1::make_tuple(16000, 48000, 48000, 25),
std::tr1::make_tuple(16000, 48000, 32000, 25),
std::tr1::make_tuple(16000, 48000, 16000, 25),
std::tr1::make_tuple(16000, 44100, 48000, 15),
std::tr1::make_tuple(16000, 44100, 32000, 15),
std::tr1::make_tuple(16000, 44100, 16000, 15),
std::tr1::make_tuple(16000, 32000, 48000, 25),
std::tr1::make_tuple(16000, 32000, 32000, 25),
std::tr1::make_tuple(16000, 32000, 16000, 25),
std::tr1::make_tuple(16000, 16000, 48000, 30),
std::tr1::make_tuple(16000, 16000, 32000, 30),
std::tr1::make_tuple(16000, 16000, 16000, 0)));
#endif
// TODO(henrike): re-implement functionality lost when removing the old main
// function. See
// https://code.google.com/p/webrtc/issues/detail?id=1981

View File

@ -155,7 +155,6 @@ void void_main(int argc, char* argv[]) {
const char* aecm_echo_path_out_filename = NULL;
int32_t sample_rate_hz = 16000;
int32_t device_sample_rate_hz = 16000;
int num_capture_input_channels = 1;
int num_capture_output_channels = 1;
@ -563,6 +562,8 @@ void void_main(int argc, char* argv[]) {
Event event_msg;
scoped_ptr<ChannelBuffer<float> > reverse_cb;
scoped_ptr<ChannelBuffer<float> > primary_cb;
int output_sample_rate = 32000;
AudioProcessing::ChannelLayout output_layout = AudioProcessing::kMono;
while (ReadMessageFromFile(pb_file, &event_msg)) {
std::ostringstream trace_stream;
trace_stream << "Processed frames: " << reverse_count << " (reverse), "
@ -578,18 +579,21 @@ void void_main(int argc, char* argv[]) {
ASSERT_TRUE(msg.has_num_output_channels());
ASSERT_TRUE(msg.has_num_reverse_channels());
int reverse_sample_rate = msg.sample_rate();
if (msg.has_reverse_sample_rate())
if (msg.has_reverse_sample_rate()) {
reverse_sample_rate = msg.reverse_sample_rate();
ASSERT_EQ(apm->kNoError, apm->Initialize(msg.sample_rate(),
reverse_sample_rate,
msg.num_input_channels(),
msg.num_output_channels(),
msg.num_reverse_channels()));
ASSERT_TRUE(msg.has_device_sample_rate());
ASSERT_EQ(apm->kNoError,
apm->echo_cancellation()->set_device_sample_rate_hz(
msg.device_sample_rate()));
}
output_sample_rate = msg.sample_rate();
if (msg.has_output_sample_rate()) {
output_sample_rate = msg.output_sample_rate();
}
output_layout = LayoutFromChannels(msg.num_output_channels());
ASSERT_EQ(kNoErr, apm->Initialize(
msg.sample_rate(),
output_sample_rate,
reverse_sample_rate,
LayoutFromChannels(msg.num_input_channels()),
output_layout,
LayoutFromChannels(msg.num_reverse_channels())));
samples_per_channel = msg.sample_rate() / 100;
far_frame.sample_rate_hz_ = msg.sample_rate();
@ -606,11 +610,13 @@ void void_main(int argc, char* argv[]) {
if (verbose) {
printf("Init at frame: %d (primary), %d (reverse)\n",
primary_count, reverse_count);
printf(" Sample rate: %d Hz\n", msg.sample_rate());
printf(" Primary rates: %d Hz (in), %d Hz (out)\n",
msg.sample_rate(), output_sample_rate);
printf(" Primary channels: %d (in), %d (out)\n",
msg.num_input_channels(),
msg.num_output_channels());
printf(" Reverse channels: %d \n", msg.num_reverse_channels());
printf(" Reverse rate: %d\n", reverse_sample_rate);
printf(" Reverse channels: %d\n", msg.num_reverse_channels());
}
} else if (event_msg.type() == Event::REVERSE_STREAM) {
@ -715,7 +721,9 @@ void void_main(int argc, char* argv[]) {
near_frame.samples_per_channel_,
near_frame.sample_rate_hz_,
LayoutFromChannels(near_frame.num_channels_),
LayoutFromChannels(apm->num_output_channels()));
output_sample_rate,
output_layout,
primary_cb->channels());
}
if (err == apm->kBadStreamParameterWarning) {
@ -814,19 +822,20 @@ void void_main(int argc, char* argv[]) {
fread(&sample_rate_hz, sizeof(sample_rate_hz), 1, event_file));
samples_per_channel = sample_rate_hz / 100;
int32_t unused_device_sample_rate_hz;
ASSERT_EQ(1u,
fread(&device_sample_rate_hz,
sizeof(device_sample_rate_hz),
fread(&unused_device_sample_rate_hz,
sizeof(unused_device_sample_rate_hz),
1,
event_file));
// TODO(bjornv): Replace set_sample_rate_hz() when we have a smarter
// AnalyzeReverseStream().
ASSERT_EQ(apm->kNoError, apm->set_sample_rate_hz(sample_rate_hz));
ASSERT_EQ(apm->kNoError,
apm->echo_cancellation()->set_device_sample_rate_hz(
device_sample_rate_hz));
ASSERT_EQ(kNoErr, apm->Initialize(
sample_rate_hz,
sample_rate_hz,
sample_rate_hz,
LayoutFromChannels(num_capture_input_channels),
LayoutFromChannels(num_capture_output_channels),
LayoutFromChannels(num_render_channels)));
far_frame.sample_rate_hz_ = sample_rate_hz;
far_frame.samples_per_channel_ = samples_per_channel;

View File

@ -9,6 +9,7 @@
*/
#include "webrtc/audio_processing/debug.pb.h"
#include "webrtc/modules/audio_processing/common.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
@ -18,37 +19,6 @@ namespace webrtc {
static const AudioProcessing::Error kNoErr = AudioProcessing::kNoError;
#define EXPECT_NOERR(expr) EXPECT_EQ(kNoErr, (expr))
static const int kChunkSizeMs = 10;
// Helper to encapsulate a contiguous data buffer with access to a pointer
// array of the deinterleaved channels.
template <typename T>
class ChannelBuffer {
public:
ChannelBuffer(int samples_per_channel, int num_channels)
: data_(new T[samples_per_channel * num_channels]),
channels_(new T*[num_channels]),
samples_per_channel_(samples_per_channel) {
memset(data_.get(), 0, sizeof(T) * samples_per_channel * num_channels);
for (int i = 0; i < num_channels; ++i)
channels_[i] = &data_[i * samples_per_channel];
}
~ChannelBuffer() {}
void CopyFrom(const void* channel_ptr, int index) {
memcpy(channels_[index], channel_ptr, samples_per_channel_ * sizeof(T));
}
T* data() { return data_.get(); }
T* channel(int index) { return channels_[index]; }
T** channels() { return channels_.get(); }
private:
scoped_ptr<T[]> data_;
scoped_ptr<T*[]> channels_;
int samples_per_channel_;
};
// Exits on failure; do not use in unit tests.
static inline FILE* OpenFile(const std::string& filename, const char* mode) {
FILE* file = fopen(filename.c_str(), mode);
@ -59,10 +29,15 @@ static inline FILE* OpenFile(const std::string& filename, const char* mode) {
return file;
}
static inline int SamplesFromRate(int rate) {
return AudioProcessing::kChunkSizeMs * rate / 1000;
}
static inline void SetFrameSampleRate(AudioFrame* frame,
int sample_rate_hz) {
frame->sample_rate_hz_ = sample_rate_hz;
frame->samples_per_channel_ = kChunkSizeMs * sample_rate_hz / 1000;
frame->samples_per_channel_ = AudioProcessing::kChunkSizeMs *
sample_rate_hz / 1000;
}
template <typename T>

View File

@ -165,8 +165,6 @@ while (ReadMessageFromFile(debug_file, &event_msg)) {
// These should print out zeros if they're missing.
fprintf(settings_file, "Init at frame: %d\n", frame_count);
fprintf(settings_file, " Sample rate: %d\n", msg.sample_rate());
fprintf(settings_file, " Device sample rate: %d\n",
msg.device_sample_rate());
fprintf(settings_file, " Input channels: %d\n",
msg.num_input_channels());
fprintf(settings_file, " Output channels: %d\n",

View File

@ -70,7 +70,7 @@ int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
// TODO(ajm): concatenate data in frame buffer here.
int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
apm_->split_sample_rate_hz(),
apm_->proc_split_sample_rate_hz(),
mixed_data,
frame_size_samples_);
if (vad_ret == 0) {
@ -146,7 +146,8 @@ int VoiceDetectionImpl::Initialize() {
}
using_external_vad_ = false;
frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000);
frame_size_samples_ = frame_size_ms_ *
apm_->proc_split_sample_rate_hz() / 1000;
// TODO(ajm): intialize frame buffer here.
return apm_->kNoError;

View File

@ -46,12 +46,16 @@
'../../resources/deflicker_before_cif_short.yuv',
'../../resources/far16_stereo.pcm',
'../../resources/far32_stereo.pcm',
'../../resources/far44_stereo.pcm',
'../../resources/far48_stereo.pcm',
'../../resources/far8_stereo.pcm',
'../../resources/foremanColorEnhanced_cif_short.yuv',
'../../resources/foreman_cif.yuv',
'../../resources/foreman_cif_short.yuv',
'../../resources/near16_stereo.pcm',
'../../resources/near32_stereo.pcm',
'../../resources/near44_stereo.pcm',
'../../resources/near48_stereo.pcm',
'../../resources/near8_stereo.pcm',
'../../resources/ref03.aecdump',
'../../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingChoke1_0_AST.bin',

View File

@ -602,7 +602,7 @@ void OutputMixer::APMAnalyzeReverseStream() {
// side. Downmix to mono.
AudioFrame frame;
frame.num_channels_ = 1;
frame.sample_rate_hz_ = _audioProcessingModulePtr->sample_rate_hz();
frame.sample_rate_hz_ = _audioProcessingModulePtr->input_sample_rate_hz();
RemixAndResample(_audioFrame, &audioproc_resampler_, &frame);
if (_audioProcessingModulePtr->AnalyzeReverseStream(&frame) == -1) {

View File

@ -438,11 +438,6 @@ int VoEBaseImpl::Init(AudioDeviceModule* external_adm,
// Set the error state for any failures in this block.
_shared->SetLastError(VE_APM_ERROR);
if (audioproc->echo_cancellation()->set_device_sample_rate_hz(48000)) {
LOG_FERR1(LS_ERROR, set_device_sample_rate_hz, 48000);
return -1;
}
// Configure AudioProcessing components.
if (audioproc->high_pass_filter()->Enable(true) != 0) {
LOG_FERR1(LS_ERROR, high_pass_filter()->Enable, true);