Audio processing: Feed each processing step its choice of int or float data

Each audio processing step is given a pointer to an AudioBuffer, where
it can read and write int data. This patch adds corresponding
AudioBuffer methods to read and write float data; the buffer will
automatically convert the stored data between int and float as
necessary.

This patch also modifies the echo cancellation step to make use of the
new methods (it was already using floats internally; now it doesn't
have to convert from and to ints anymore).

(The reference data to the ApmTest.Process test had to be modified
slightly; this is because the echo canceller no longer unnecessarily
converts float data to int and then immediately back to float for each
iteration in the loop in EchoCancellationImpl::ProcessCaptureAudio.)

BUG=
R=aluebs@webrtc.org, andrew@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/18399005

git-svn-id: http://webrtc.googlecode.com/svn/trunk@6138 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
kwiberg@webrtc.org 2014-05-14 09:01:35 +00:00
parent 3d5cb33da4
commit 934a265a47
9 changed files with 180 additions and 95 deletions

View File

@ -116,7 +116,7 @@ extern int webrtc_aec_instance_count;
// "Private" function prototypes.
static void ProcessBlock(AecCore* aec);
static void NonLinearProcessing(AecCore* aec, short* output, short* outputH);
static void NonLinearProcessing(AecCore* aec, float* output, float* outputH);
static void GetHighbandGain(const float* lambda, float* nlpGainHband);
@ -160,28 +160,28 @@ int WebRtcAec_CreateAec(AecCore** aecInst) {
return -1;
}
aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t));
aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
if (!aec->nearFrBuf) {
WebRtcAec_FreeAec(aec);
aec = NULL;
return -1;
}
aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t));
aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
if (!aec->outFrBuf) {
WebRtcAec_FreeAec(aec);
aec = NULL;
return -1;
}
aec->nearFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t));
aec->nearFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
if (!aec->nearFrBufH) {
WebRtcAec_FreeAec(aec);
aec = NULL;
return -1;
}
aec->outFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(int16_t));
aec->outFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
if (!aec->outFrBufH) {
WebRtcAec_FreeAec(aec);
aec = NULL;
@ -617,11 +617,11 @@ int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) {
}
void WebRtcAec_ProcessFrame(AecCore* aec,
const short* nearend,
const short* nearendH,
const float* nearend,
const float* nearendH,
int knownDelay,
int16_t* out,
int16_t* outH) {
float* out,
float* outH) {
int out_elements = 0;
// For each frame the process is as follows:
@ -814,7 +814,7 @@ void WebRtcAec_SetSystemDelay(AecCore* self, int delay) {
static void ProcessBlock(AecCore* aec) {
int i;
float d[PART_LEN], y[PART_LEN], e[PART_LEN], dH[PART_LEN];
float y[PART_LEN], e[PART_LEN];
float scale;
float fft[PART_LEN2];
@ -833,30 +833,22 @@ static void ProcessBlock(AecCore* aec) {
const float ramp = 1.0002f;
const float gInitNoise[2] = {0.999f, 0.001f};
int16_t nearend[PART_LEN];
int16_t* nearend_ptr = NULL;
int16_t output[PART_LEN];
int16_t outputH[PART_LEN];
float nearend[PART_LEN];
float* nearend_ptr = NULL;
float output[PART_LEN];
float outputH[PART_LEN];
float* xf_ptr = NULL;
memset(dH, 0, sizeof(dH));
// Concatenate old and new nearend blocks.
if (aec->sampFreq == 32000) {
// Get the upper band first so we can reuse |nearend|.
WebRtc_ReadBuffer(aec->nearFrBufH, (void**)&nearend_ptr, nearend, PART_LEN);
for (i = 0; i < PART_LEN; i++) {
dH[i] = (float)(nearend_ptr[i]);
}
memcpy(aec->dBufH + PART_LEN, dH, sizeof(float) * PART_LEN);
memcpy(aec->dBufH + PART_LEN, nearend_ptr, sizeof(nearend));
}
WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN);
memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend));
// ---------- Ooura fft ----------
// Concatenate old and new nearend blocks.
for (i = 0; i < PART_LEN; i++) {
d[i] = (float)(nearend_ptr[i]);
}
memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN);
#ifdef WEBRTC_AEC_DEBUG_DUMP
{
@ -968,7 +960,7 @@ static void ProcessBlock(AecCore* aec) {
}
for (i = 0; i < PART_LEN; i++) {
e[i] = d[i] - y[i];
e[i] = nearend_ptr[i] - y[i];
}
// Error fft
@ -1027,7 +1019,7 @@ static void ProcessBlock(AecCore* aec) {
#endif
}
static void NonLinearProcessing(AecCore* aec, short* output, short* outputH) {
static void NonLinearProcessing(AecCore* aec, float* output, float* outputH) {
float efw[2][PART_LEN1], dfw[2][PART_LEN1], xfw[2][PART_LEN1];
complex_t comfortNoiseHband[PART_LEN1];
float fft[PART_LEN2];
@ -1321,13 +1313,10 @@ static void NonLinearProcessing(AecCore* aec, short* output, short* outputH) {
fft[i] *= scale; // fft scaling
fft[i] = fft[i] * sqrtHanning[i] + aec->outBuf[i];
// Saturation protection
output[i] = (short)WEBRTC_SPL_SAT(
WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN);
fft[PART_LEN + i] *= scale; // fft scaling
aec->outBuf[i] = fft[PART_LEN + i] * sqrtHanning[PART_LEN - i];
}
memcpy(output, fft, sizeof(*output) * PART_LEN);
// For H band
if (aec->sampFreq == 32000) {
@ -1351,8 +1340,8 @@ static void NonLinearProcessing(AecCore* aec, short* output, short* outputH) {
// compute gain factor
for (i = 0; i < PART_LEN; i++) {
dtmp = (float)aec->dBufH[i];
dtmp = (float)dtmp * nlpGainHband; // for variable gain
dtmp = aec->dBufH[i];
dtmp = dtmp * nlpGainHband; // for variable gain
// add some comfort noise where Hband is attenuated
if (flagHbandCn == 1) {
@ -1360,9 +1349,7 @@ static void NonLinearProcessing(AecCore* aec, short* output, short* outputH) {
dtmp += cnScaleHband * fft[i];
}
// Saturation protection
outputH[i] = (short)WEBRTC_SPL_SAT(
WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN);
outputH[i] = dtmp;
}
}

View File

@ -60,11 +60,11 @@ void WebRtcAec_InitAec_mips(void);
void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend);
void WebRtcAec_ProcessFrame(AecCore* aec,
const short* nearend,
const short* nearendH,
const float* nearend,
const float* nearendH,
int knownDelay,
int16_t* out,
int16_t* outH);
float* out,
float* outH);
// A helper function to call WebRtc_MoveReadPtr() for all far-end buffers.
// Returns the number of elements moved, and adjusts |system_delay| by the

View File

@ -104,18 +104,18 @@ int webrtc_aec_instance_count = 0;
static void EstBufDelayNormal(aecpc_t* aecInst);
static void EstBufDelayExtended(aecpc_t* aecInst);
static int ProcessNormal(aecpc_t* self,
const int16_t* near,
const int16_t* near_high,
int16_t* out,
int16_t* out_high,
const float* near,
const float* near_high,
float* out,
float* out_high,
int16_t num_samples,
int16_t reported_delay_ms,
int32_t skew);
static void ProcessExtended(aecpc_t* self,
const int16_t* near,
const int16_t* near_high,
int16_t* out,
int16_t* out_high,
const float* near,
const float* near_high,
float* out,
float* out_high,
int16_t num_samples,
int16_t reported_delay_ms,
int32_t skew);
@ -372,10 +372,10 @@ int32_t WebRtcAec_BufferFarend(void* aecInst,
}
int32_t WebRtcAec_Process(void* aecInst,
const int16_t* nearend,
const int16_t* nearendH,
int16_t* out,
int16_t* outH,
const float* nearend,
const float* nearendH,
float* out,
float* outH,
int16_t nrOfSamples,
int16_t msInSndCardBuf,
int32_t skew) {
@ -632,10 +632,10 @@ AecCore* WebRtcAec_aec_core(void* handle) {
}
static int ProcessNormal(aecpc_t* aecpc,
const int16_t* nearend,
const int16_t* nearendH,
int16_t* out,
int16_t* outH,
const float* nearend,
const float* nearendH,
float* out,
float* outH,
int16_t nrOfSamples,
int16_t msInSndCardBuf,
int32_t skew) {
@ -689,10 +689,10 @@ static int ProcessNormal(aecpc_t* aecpc,
if (aecpc->startup_phase) {
// Only needed if they don't already point to the same place.
if (nearend != out) {
memcpy(out, nearend, sizeof(short) * nrOfSamples);
memcpy(out, nearend, sizeof(*out) * nrOfSamples);
}
if (nearendH != outH) {
memcpy(outH, nearendH, sizeof(short) * nrOfSamples);
memcpy(outH, nearendH, sizeof(*outH) * nrOfSamples);
}
// The AEC is in the start up mode
@ -789,10 +789,10 @@ static int ProcessNormal(aecpc_t* aecpc,
}
static void ProcessExtended(aecpc_t* self,
const int16_t* near,
const int16_t* near_high,
int16_t* out,
int16_t* out_high,
const float* near,
const float* near_high,
float* out,
float* out_high,
int16_t num_samples,
int16_t reported_delay_ms,
int32_t skew) {
@ -823,10 +823,10 @@ static void ProcessExtended(aecpc_t* self,
if (!self->farend_started) {
// Only needed if they don't already point to the same place.
if (near != out) {
memcpy(out, near, sizeof(short) * num_samples);
memcpy(out, near, sizeof(*out) * num_samples);
}
if (near_high != out_high) {
memcpy(out_high, near_high, sizeof(short) * num_samples);
memcpy(out_high, near_high, sizeof(*out_high) * num_samples);
}
return;
}

View File

@ -133,9 +133,9 @@ int32_t WebRtcAec_BufferFarend(void* aecInst,
* Inputs Description
* -------------------------------------------------------------------
* void* aecInst Pointer to the AEC instance
* int16_t* nearend In buffer containing one frame of
* float* nearend In buffer containing one frame of
* nearend+echo signal for L band
* int16_t* nearendH In buffer containing one frame of
* float* nearendH In buffer containing one frame of
* nearend+echo signal for H band
* int16_t nrOfSamples Number of samples in nearend buffer
* int16_t msInSndCardBuf Delay estimate for sound card and
@ -146,18 +146,18 @@ int32_t WebRtcAec_BufferFarend(void* aecInst,
*
* Outputs Description
* -------------------------------------------------------------------
* int16_t* out Out buffer, one frame of processed nearend
* float* out Out buffer, one frame of processed nearend
* for L band
* int16_t* outH Out buffer, one frame of processed nearend
* float* outH Out buffer, one frame of processed nearend
* for H band
* int32_t return 0: OK
* -1: error
*/
int32_t WebRtcAec_Process(void* aecInst,
const int16_t* nearend,
const int16_t* nearendH,
int16_t* out,
int16_t* outH,
const float* nearend,
const float* nearendH,
float* out,
float* outH,
int16_t nrOfSamples,
int16_t msInSndCardBuf,
int32_t skew);

View File

@ -46,16 +46,18 @@ class SystemDelayTest : public ::testing::Test {
aecpc_t* self_;
int samples_per_frame_;
// Dummy input/output speech data.
int16_t far_[160];
int16_t near_[160];
int16_t out_[160];
static const int kSamplesPerChunk = 160;
int16_t far_[kSamplesPerChunk];
float near_[kSamplesPerChunk];
float out_[kSamplesPerChunk];
};
SystemDelayTest::SystemDelayTest()
: handle_(NULL), self_(NULL), samples_per_frame_(0) {
// Dummy input data are set with more or less arbitrary non-zero values.
memset(far_, 1, sizeof(far_));
memset(near_, 2, sizeof(near_));
for (int i = 0; i < kSamplesPerChunk; i++)
near_[i] = 514.0;
memset(out_, 0, sizeof(out_));
}

View File

@ -68,6 +68,64 @@ void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
} // namespace
// One int16_t and one float ChannelBuffer that are kept in sync. The sync is
// broken when someone requests write access to either ChannelBuffer, and
// reestablished when someone requests the outdated ChannelBuffer. It is
// therefore safe to use the return value of ibuf() and fbuf() until the next
// call to the other method.
class IFChannelBuffer {
public:
IFChannelBuffer(int samples_per_channel, int num_channels)
: ivalid_(true),
ibuf_(samples_per_channel, num_channels),
fvalid_(true),
fbuf_(samples_per_channel, num_channels) {}
ChannelBuffer<int16_t>* ibuf() {
RefreshI();
fvalid_ = false;
return &ibuf_;
}
ChannelBuffer<float>* fbuf() {
RefreshF();
ivalid_ = false;
return &fbuf_;
}
private:
void RefreshF() {
if (!fvalid_) {
assert(ivalid_);
const int16_t* const int_data = ibuf_.data();
float* const float_data = fbuf_.data();
const int length = fbuf_.length();
for (int i = 0; i < length; ++i)
float_data[i] = int_data[i];
fvalid_ = true;
}
}
void RefreshI() {
if (!ivalid_) {
assert(fvalid_);
const float* const float_data = fbuf_.data();
int16_t* const int_data = ibuf_.data();
const int length = ibuf_.length();
for (int i = 0; i < length; ++i)
int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits<int16_t>::max(),
float_data[i],
std::numeric_limits<int16_t>::min());
ivalid_ = true;
}
}
bool ivalid_;
ChannelBuffer<int16_t> ibuf_;
bool fvalid_;
ChannelBuffer<float> fbuf_;
};
class SplitChannelBuffer {
public:
SplitChannelBuffer(int samples_per_split_channel, int num_channels)
@ -76,12 +134,14 @@ class SplitChannelBuffer {
}
~SplitChannelBuffer() {}
int16_t* low_channel(int i) { return low_.channel(i); }
int16_t* high_channel(int i) { return high_.channel(i); }
int16_t* low_channel(int i) { return low_.ibuf()->channel(i); }
int16_t* high_channel(int i) { return high_.ibuf()->channel(i); }
float* low_channel_f(int i) { return low_.fbuf()->channel(i); }
float* high_channel_f(int i) { return high_.fbuf()->channel(i); }
private:
ChannelBuffer<int16_t> low_;
ChannelBuffer<int16_t> high_;
IFChannelBuffer low_;
IFChannelBuffer high_;
};
AudioBuffer::AudioBuffer(int input_samples_per_channel,
@ -102,8 +162,8 @@ AudioBuffer::AudioBuffer(int input_samples_per_channel,
is_muted_(false),
data_(NULL),
keyboard_data_(NULL),
channels_(new ChannelBuffer<int16_t>(proc_samples_per_channel_,
num_proc_channels_)) {
channels_(new IFChannelBuffer(proc_samples_per_channel_,
num_proc_channels_)) {
assert(input_samples_per_channel_ > 0);
assert(proc_samples_per_channel_ > 0);
assert(output_samples_per_channel_ > 0);
@ -185,7 +245,7 @@ void AudioBuffer::CopyFrom(const float* const* data,
// Convert to int16.
for (int i = 0; i < num_proc_channels_; ++i) {
ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_,
channels_->channel(i));
channels_->ibuf()->channel(i));
}
}
@ -202,7 +262,9 @@ void AudioBuffer::CopyTo(int samples_per_channel,
data_ptr = process_buffer_->channels();
}
for (int i = 0; i < num_proc_channels_; ++i) {
ScaleToFloat(channels_->channel(i), proc_samples_per_channel_, data_ptr[i]);
ScaleToFloat(channels_->ibuf()->channel(i),
proc_samples_per_channel_,
data_ptr[i]);
}
// Resample.
@ -233,7 +295,7 @@ const int16_t* AudioBuffer::data(int channel) const {
return data_;
}
return channels_->channel(channel);
return channels_->ibuf()->channel(channel);
}
int16_t* AudioBuffer::data(int channel) {
@ -241,6 +303,19 @@ int16_t* AudioBuffer::data(int channel) {
return const_cast<int16_t*>(t->data(channel));
}
float* AudioBuffer::data_f(int channel) {
assert(channel >= 0 && channel < num_proc_channels_);
if (data_ != NULL) {
// Need to make a copy of the data instead of just pointing to it, since
// we're about to convert it to float.
assert(channel == 0 && num_proc_channels_ == 1);
memcpy(channels_->ibuf()->channel(0), data_,
sizeof(*data_) * proc_samples_per_channel_);
data_ = NULL;
}
return channels_->fbuf()->channel(channel);
}
const int16_t* AudioBuffer::low_pass_split_data(int channel) const {
assert(channel >= 0 && channel < num_proc_channels_);
if (split_channels_.get() == NULL) {
@ -255,6 +330,12 @@ int16_t* AudioBuffer::low_pass_split_data(int channel) {
return const_cast<int16_t*>(t->low_pass_split_data(channel));
}
float* AudioBuffer::low_pass_split_data_f(int channel) {
assert(channel >= 0 && channel < num_proc_channels_);
return split_channels_.get() ? split_channels_->low_channel_f(channel)
: data_f(channel);
}
const int16_t* AudioBuffer::high_pass_split_data(int channel) const {
assert(channel >= 0 && channel < num_proc_channels_);
if (split_channels_.get() == NULL) {
@ -269,6 +350,12 @@ int16_t* AudioBuffer::high_pass_split_data(int channel) {
return const_cast<int16_t*>(t->high_pass_split_data(channel));
}
float* AudioBuffer::high_pass_split_data_f(int channel) {
assert(channel >= 0 && channel < num_proc_channels_);
return split_channels_.get() ? split_channels_->high_channel_f(channel)
: NULL;
}
const int16_t* AudioBuffer::mixed_data(int channel) const {
assert(channel >= 0 && channel < num_mixed_channels_);
@ -348,7 +435,7 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
int16_t* interleaved = frame->data_;
for (int i = 0; i < num_proc_channels_; i++) {
int16_t* deinterleaved = channels_->channel(i);
int16_t* deinterleaved = channels_->ibuf()->channel(i);
int interleaved_idx = i;
for (int j = 0; j < proc_samples_per_channel_; j++) {
deinterleaved[j] = interleaved[interleaved_idx];
@ -368,14 +455,15 @@ void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
return;
}
if (num_proc_channels_ == 1) {
if (data_) {
assert(num_proc_channels_ == 1);
assert(data_ == frame->data_);
return;
}
int16_t* interleaved = frame->data_;
for (int i = 0; i < num_proc_channels_; i++) {
int16_t* deinterleaved = channels_->channel(i);
int16_t* deinterleaved = channels_->ibuf()->channel(i);
int interleaved_idx = i;
for (int j = 0; j < proc_samples_per_channel_; j++) {
interleaved[interleaved_idx] = deinterleaved[j];
@ -394,8 +482,8 @@ void AudioBuffer::CopyAndMix(int num_mixed_channels) {
num_mixed_channels));
}
StereoToMono(channels_->channel(0),
channels_->channel(1),
StereoToMono(channels_->ibuf()->channel(0),
channels_->ibuf()->channel(1),
mixed_channels_->channel(0),
proc_samples_per_channel_);

View File

@ -24,6 +24,7 @@ namespace webrtc {
class PushSincResampler;
class SplitChannelBuffer;
class IFChannelBuffer;
struct SplitFilterStates {
SplitFilterStates() {
@ -64,6 +65,13 @@ class AudioBuffer {
const int16_t* mixed_data(int channel) const;
const int16_t* mixed_low_pass_data(int channel) const;
const int16_t* low_pass_reference(int channel) const;
// Float versions of the accessors, with automatic conversion back and forth
// as necessary. The range of the numbers are the same as for int16_t.
float* data_f(int channel);
float* low_pass_split_data_f(int channel);
float* high_pass_split_data_f(int channel);
const float* keyboard_data() const;
SplitFilterStates* filter_states(int channel);
@ -114,7 +122,7 @@ class AudioBuffer {
int16_t* data_;
const float* keyboard_data_;
scoped_ptr<ChannelBuffer<int16_t> > channels_;
scoped_ptr<IFChannelBuffer> channels_;
scoped_ptr<SplitChannelBuffer> split_channels_;
scoped_ptr<SplitFilterStates[]> filter_states_;
scoped_ptr<ChannelBuffer<int16_t> > mixed_channels_;

View File

@ -129,10 +129,10 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) {
Handle* my_handle = handle(handle_index);
err = WebRtcAec_Process(
my_handle,
audio->low_pass_split_data(i),
audio->high_pass_split_data(i),
audio->low_pass_split_data(i),
audio->high_pass_split_data(i),
audio->low_pass_split_data_f(i),
audio->high_pass_split_data_f(i),
audio->low_pass_split_data_f(i),
audio->high_pass_split_data_f(i),
static_cast<int16_t>(audio->samples_per_split_channel()),
apm_->stream_delay_ms(),
stream_drift_samples_);