Refactor audio conversion functions.

Use a consistent naming scheme that can be understood at the callsite
without having to refer to documentation.

Remove hacks in AudioBuffer intended to maintain bit-exactness with the
float path. The conversions etc. are now all natural, and instead we
enforce close but not bit-exact output between the two paths.

Output of ApmTest.Process:
https://paste.googleplex.com/5931055831842816

R=aluebs@webrtc.org, bjornv@webrtc.org, kwiberg@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/13049004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@7561 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
andrew@webrtc.org 2014-10-30 03:40:10 +00:00
parent 776e6f289c
commit 4fc4addc81
11 changed files with 157 additions and 98 deletions

View File

@ -14,19 +14,29 @@
namespace webrtc { namespace webrtc {
void RoundToInt16(const float* src, size_t size, int16_t* dest) { void FloatToS16(const float* src, size_t size, int16_t* dest) {
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
dest[i] = RoundToInt16(src[i]); dest[i] = FloatToS16(src[i]);
} }
void ScaleAndRoundToInt16(const float* src, size_t size, int16_t* dest) { void S16ToFloat(const int16_t* src, size_t size, float* dest) {
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
dest[i] = ScaleAndRoundToInt16(src[i]); dest[i] = S16ToFloat(src[i]);
} }
void ScaleToFloat(const int16_t* src, size_t size, float* dest) { void FloatS16ToS16(const float* src, size_t size, int16_t* dest) {
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
dest[i] = ScaleToFloat(src[i]); dest[i] = FloatS16ToS16(src[i]);
}
void FloatToFloatS16(const float* src, size_t size, float* dest) {
for (size_t i = 0; i < size; ++i)
dest[i] = FloatToFloatS16(src[i]);
}
void FloatS16ToFloat(const float* src, size_t size, float* dest) {
for (size_t i = 0; i < size; ++i)
dest[i] = FloatS16ToFloat(src[i]);
} }
} // namespace webrtc } // namespace webrtc

View File

@ -26,17 +26,7 @@ void ExpectArraysEq(const float* ref, const float* test, int length) {
} }
} }
TEST(AudioUtilTest, RoundToInt16) { TEST(AudioUtilTest, FloatToS16) {
const int kSize = 7;
const float kInput[kSize] = {
0.f, 0.4f, 0.5f, -0.4f, -0.5f, 32768.f, -32769.f};
const int16_t kReference[kSize] = {0, 0, 1, 0, -1, 32767, -32768};
int16_t output[kSize];
RoundToInt16(kInput, kSize, output);
ExpectArraysEq(kReference, output, kSize);
}
TEST(AudioUtilTest, ScaleAndRoundToInt16) {
const int kSize = 9; const int kSize = 9;
const float kInput[kSize] = { const float kInput[kSize] = {
0.f, 0.4f / 32767.f, 0.6f / 32767.f, -0.4f / 32768.f, -0.6f / 32768.f, 0.f, 0.4f / 32767.f, 0.6f / 32767.f, -0.4f / 32768.f, -0.6f / 32768.f,
@ -44,17 +34,51 @@ TEST(AudioUtilTest, ScaleAndRoundToInt16) {
const int16_t kReference[kSize] = { const int16_t kReference[kSize] = {
0, 0, 1, 0, -1, 32767, -32768, 32767, -32768}; 0, 0, 1, 0, -1, 32767, -32768, 32767, -32768};
int16_t output[kSize]; int16_t output[kSize];
ScaleAndRoundToInt16(kInput, kSize, output); FloatToS16(kInput, kSize, output);
ExpectArraysEq(kReference, output, kSize); ExpectArraysEq(kReference, output, kSize);
} }
TEST(AudioUtilTest, ScaleToFloat) { TEST(AudioUtilTest, S16ToFloat) {
const int kSize = 7; const int kSize = 7;
const int16_t kInput[kSize] = {0, 1, -1, 16384, -16384, 32767, -32768}; const int16_t kInput[kSize] = {0, 1, -1, 16384, -16384, 32767, -32768};
const float kReference[kSize] = { const float kReference[kSize] = {
0.f, 1.f / 32767.f, -1.f / 32768.f, 16384.f / 32767.f, -0.5f, 1.f, -1.f}; 0.f, 1.f / 32767.f, -1.f / 32768.f, 16384.f / 32767.f, -0.5f, 1.f, -1.f};
float output[kSize]; float output[kSize];
ScaleToFloat(kInput, kSize, output); S16ToFloat(kInput, kSize, output);
ExpectArraysEq(kReference, output, kSize);
}
TEST(AudioUtilTest, FloatS16ToS16) {
const int kSize = 7;
const float kInput[kSize] = {
0.f, 0.4f, 0.5f, -0.4f, -0.5f, 32768.f, -32769.f};
const int16_t kReference[kSize] = {0, 0, 1, 0, -1, 32767, -32768};
int16_t output[kSize];
FloatS16ToS16(kInput, kSize, output);
ExpectArraysEq(kReference, output, kSize);
}
TEST(AudioUtilTest, FloatToFloatS16) {
const int kSize = 9;
const float kInput[kSize] = {
0.f, 0.4f / 32767.f, 0.6f / 32767.f, -0.4f / 32768.f, -0.6f / 32768.f,
1.f, -1.f, 1.1f, -1.1f};
const float kReference[kSize] = {
0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f};
float output[kSize];
FloatToFloatS16(kInput, kSize, output);
ExpectArraysEq(kReference, output, kSize);
}
TEST(AudioUtilTest, FloatS16ToFloat) {
const int kSize = 9;
const float kInput[kSize] = {
0.f, 0.4f, 0.6f, -0.4f, -0.6f, 32767.f, -32768.f, 36043.7f, -36044.8f};
const float kReference[kSize] = {
0.f, 0.4f / 32767.f, 0.6f / 32767.f, -0.4f / 32768.f, -0.6f / 32768.f,
1.f, -1.f, 1.1f, -1.1f};
float output[kSize];
FloatS16ToFloat(kInput, kSize, output);
ExpectArraysEq(kReference, output, kSize); ExpectArraysEq(kReference, output, kSize);
} }

View File

@ -20,18 +20,11 @@ namespace webrtc {
typedef std::numeric_limits<int16_t> limits_int16; typedef std::numeric_limits<int16_t> limits_int16;
static inline int16_t RoundToInt16(float v) { // The conversion functions use the following naming convention:
const float kMaxRound = limits_int16::max() - 0.5f; // S16: int16_t [-32768, 32767]
const float kMinRound = limits_int16::min() + 0.5f; // Float: float [-1.0, 1.0]
if (v > 0) // FloatS16: float [-32768.0, 32767.0]
return v >= kMaxRound ? limits_int16::max() : static inline int16_t FloatToS16(float v) {
static_cast<int16_t>(v + 0.5f);
return v <= kMinRound ? limits_int16::min() :
static_cast<int16_t>(v - 0.5f);
}
// Scale (from [-1, 1]) and round to full-range int16 with clamping.
static inline int16_t ScaleAndRoundToInt16(float v) {
if (v > 0) if (v > 0)
return v >= 1 ? limits_int16::max() : return v >= 1 ? limits_int16::max() :
static_cast<int16_t>(v * limits_int16::max() + 0.5f); static_cast<int16_t>(v * limits_int16::max() + 0.5f);
@ -39,22 +32,37 @@ static inline int16_t ScaleAndRoundToInt16(float v) {
static_cast<int16_t>(-v * limits_int16::min() - 0.5f); static_cast<int16_t>(-v * limits_int16::min() - 0.5f);
} }
// Scale to float [-1, 1]. static inline float S16ToFloat(int16_t v) {
static inline float ScaleToFloat(int16_t v) { static const float kMaxInt16Inverse = 1.f / limits_int16::max();
const float kMaxInt16Inverse = 1.f / limits_int16::max(); static const float kMinInt16Inverse = 1.f / limits_int16::min();
const float kMinInt16Inverse = 1.f / limits_int16::min();
return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse); return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse);
} }
// Round |size| elements of |src| to int16 with clamping and write to |dest|. static inline int16_t FloatS16ToS16(float v) {
void RoundToInt16(const float* src, size_t size, int16_t* dest); static const float kMaxRound = limits_int16::max() - 0.5f;
static const float kMinRound = limits_int16::min() + 0.5f;
if (v > 0)
return v >= kMaxRound ? limits_int16::max() :
static_cast<int16_t>(v + 0.5f);
return v <= kMinRound ? limits_int16::min() :
static_cast<int16_t>(v - 0.5f);
}
// Scale (from [-1, 1]) and round |size| elements of |src| to full-range int16 static inline float FloatToFloatS16(float v) {
// with clamping and write to |dest|. return v > 0 ? v * limits_int16::max() : -v * limits_int16::min();
void ScaleAndRoundToInt16(const float* src, size_t size, int16_t* dest); }
// Scale |size| elements of |src| to float [-1, 1] and write to |dest|. static inline float FloatS16ToFloat(float v) {
void ScaleToFloat(const int16_t* src, size_t size, float* dest); static const float kMaxInt16Inverse = 1.f / limits_int16::max();
static const float kMinInt16Inverse = 1.f / limits_int16::min();
return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse);
}
void FloatToS16(const float* src, size_t size, int16_t* dest);
void S16ToFloat(const int16_t* src, size_t size, float* dest);
void FloatS16ToS16(const float* src, size_t size, int16_t* dest);
void FloatToFloatS16(const float* src, size_t size, float* dest);
void FloatS16ToFloat(const float* src, size_t size, float* dest);
// Deinterleave audio from |interleaved| to the channel buffers pointed to // Deinterleave audio from |interleaved| to the channel buffers pointed to
// by |deinterleaved|. There must be sufficient space allocated in the // by |deinterleaved|. There must be sufficient space allocated in the

View File

@ -40,7 +40,7 @@ int PushSincResampler::Resample(const int16_t* source,
source_ptr_int_ = source; source_ptr_int_ = source;
// Pass NULL as the float source to have Run() read from the int16 source. // Pass NULL as the float source to have Run() read from the int16 source.
Resample(NULL, source_length, float_buffer_.get(), destination_frames_); Resample(NULL, source_length, float_buffer_.get(), destination_frames_);
RoundToInt16(float_buffer_.get(), destination_frames_, destination); FloatS16ToS16(float_buffer_.get(), destination_frames_, destination);
source_ptr_int_ = NULL; source_ptr_int_ = NULL;
return destination_frames_; return destination_frames_;
} }

View File

@ -160,16 +160,15 @@ void PushSincResamplerTest::ResampleTest(bool int_format) {
resampler_source.Run(input_samples, source.get()); resampler_source.Run(input_samples, source.get());
if (int_format) { if (int_format) {
for (int i = 0; i < kNumBlocks; ++i) { for (int i = 0; i < kNumBlocks; ++i) {
ScaleAndRoundToInt16( FloatToS16(&source[i * input_block_size], input_block_size,
&source[i * input_block_size], input_block_size, source_int.get()); source_int.get());
EXPECT_EQ(output_block_size, EXPECT_EQ(output_block_size,
resampler.Resample(source_int.get(), resampler.Resample(source_int.get(),
input_block_size, input_block_size,
destination_int.get(), destination_int.get(),
output_block_size)); output_block_size));
ScaleToFloat(destination_int.get(), S16ToFloat(destination_int.get(), output_block_size,
output_block_size, &resampled_destination[i * output_block_size]);
&resampled_destination[i * output_block_size]);
} }
} else { } else {
for (int i = 0; i < kNumBlocks; ++i) { for (int i = 0; i < kNumBlocks; ++i) {

View File

@ -68,7 +68,7 @@ void WavFile::WriteSamples(const float* samples, size_t num_samples) {
for (size_t i = 0; i < num_samples; i += kChunksize) { for (size_t i = 0; i < num_samples; i += kChunksize) {
int16_t isamples[kChunksize]; int16_t isamples[kChunksize];
const size_t chunk = std::min(kChunksize, num_samples - i); const size_t chunk = std::min(kChunksize, num_samples - i);
RoundToInt16(samples + i, chunk, isamples); FloatS16ToS16(samples + i, chunk, isamples);
WriteSamples(isamples, chunk); WriteSamples(isamples, chunk);
} }
} }

View File

@ -51,18 +51,11 @@ int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
return -1; return -1;
} }
void StereoToMono(const float* left, const float* right, float* out, template <typename T>
void StereoToMono(const T* left, const T* right, T* out,
int samples_per_channel) { int samples_per_channel) {
for (int i = 0; i < samples_per_channel; ++i) { for (int i = 0; i < samples_per_channel; ++i)
out[i] = (left[i] + right[i]) / 2; out[i] = (left[i] + right[i]) / 2;
}
}
void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
int samples_per_channel) {
for (int i = 0; i < samples_per_channel; ++i) {
out[i] = (left[i] + right[i]) >> 1;
}
} }
} // namespace } // namespace
@ -114,13 +107,7 @@ class IFChannelBuffer {
void RefreshI() { void RefreshI() {
if (!ivalid_) { if (!ivalid_) {
assert(fvalid_); assert(fvalid_);
const float* const float_data = fbuf_.data(); FloatS16ToS16(fbuf_.data(), ibuf_.length(), ibuf_.data());
int16_t* const int_data = ibuf_.data();
const int length = ibuf_.length();
for (int i = 0; i < length; ++i)
int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits<int16_t>::max(),
float_data[i],
std::numeric_limits<int16_t>::min());
ivalid_ = true; ivalid_ = true;
} }
} }
@ -230,8 +217,8 @@ void AudioBuffer::CopyFrom(const float* const* data,
// Convert to int16. // Convert to int16.
for (int i = 0; i < num_proc_channels_; ++i) { for (int i = 0; i < num_proc_channels_; ++i) {
ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_, FloatToFloatS16(data_ptr[i], proc_samples_per_channel_,
channels_->ibuf()->channel(i)); channels_->fbuf()->channel(i));
} }
} }
@ -248,9 +235,9 @@ void AudioBuffer::CopyTo(int samples_per_channel,
data_ptr = process_buffer_->channels(); data_ptr = process_buffer_->channels();
} }
for (int i = 0; i < num_proc_channels_; ++i) { for (int i = 0; i < num_proc_channels_; ++i) {
ScaleToFloat(channels_->ibuf()->channel(i), FloatS16ToFloat(channels_->fbuf()->channel(i),
proc_samples_per_channel_, proc_samples_per_channel_,
data_ptr[i]); data_ptr[i]);
} }
// Resample. // Resample.
@ -449,12 +436,7 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
// Downmix directly; no explicit deinterleaving needed. // Downmix directly; no explicit deinterleaving needed.
int16_t* downmixed = channels_->ibuf()->channel(0); int16_t* downmixed = channels_->ibuf()->channel(0);
for (int i = 0; i < input_samples_per_channel_; ++i) { for (int i = 0; i < input_samples_per_channel_; ++i) {
// HACK(ajm): The downmixing in the int16_t path is in practice never downmixed[i] = (frame->data_[i * 2] + frame->data_[i * 2 + 1]) / 2;
// called from production code. We do this weird scaling to and from float
// to satisfy tests checking for bit-exactness with the float path.
float downmix_float = (ScaleToFloat(frame->data_[i * 2]) +
ScaleToFloat(frame->data_[i * 2 + 1])) / 2;
downmixed[i] = ScaleAndRoundToInt16(downmix_float);
} }
} else { } else {
assert(num_proc_channels_ == num_input_channels_); assert(num_proc_channels_ == num_input_channels_);

View File

@ -66,9 +66,9 @@ void ConvertToFloat(const int16_t* int_data, ChannelBuffer<float>* cb) {
cb->samples_per_channel(), cb->samples_per_channel(),
cb->num_channels(), cb->num_channels(),
cb_int.channels()); cb_int.channels());
ScaleToFloat(cb_int.data(), S16ToFloat(cb_int.data(),
cb->samples_per_channel() * cb->num_channels(), cb->samples_per_channel() * cb->num_channels(),
cb->data()); cb->data());
} }
void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) { void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) {
@ -135,7 +135,7 @@ void SetFrameTo(AudioFrame* frame, int16_t left, int16_t right) {
void ScaleFrame(AudioFrame* frame, float scale) { void ScaleFrame(AudioFrame* frame, float scale) {
for (int i = 0; i < frame->samples_per_channel_ * frame->num_channels_; ++i) { for (int i = 0; i < frame->samples_per_channel_ * frame->num_channels_; ++i) {
frame->data_[i] = RoundToInt16(frame->data_[i] * scale); frame->data_[i] = FloatS16ToS16(frame->data_[i] * scale);
} }
} }
@ -1650,7 +1650,7 @@ TEST_F(ApmTest, DebugDumpFromFileHandle) {
#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP
} }
TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) { TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) {
audioproc::OutputData ref_data; audioproc::OutputData ref_data;
OpenFileAndReadMessage(ref_filename_, &ref_data); OpenFileAndReadMessage(ref_filename_, &ref_data);
@ -1679,7 +1679,8 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) {
Init(fapm.get()); Init(fapm.get());
ChannelBuffer<int16_t> output_cb(samples_per_channel, num_input_channels); ChannelBuffer<int16_t> output_cb(samples_per_channel, num_input_channels);
scoped_ptr<int16_t[]> output_int16(new int16_t[output_length]); ChannelBuffer<int16_t> output_int16(samples_per_channel,
num_input_channels);
int analog_level = 127; int analog_level = 127;
while (ReadFrame(far_file_, revframe_, revfloat_cb_.get()) && while (ReadFrame(far_file_, revframe_, revfloat_cb_.get()) &&
@ -1701,7 +1702,9 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) {
EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level)); EXPECT_NOERR(fapm->gain_control()->set_stream_analog_level(analog_level));
EXPECT_NOERR(apm_->ProcessStream(frame_)); EXPECT_NOERR(apm_->ProcessStream(frame_));
// TODO(ajm): Update to support different output rates. Deinterleave(frame_->data_, samples_per_channel, num_output_channels,
output_int16.channels());
EXPECT_NOERR(fapm->ProcessStream( EXPECT_NOERR(fapm->ProcessStream(
float_cb_->channels(), float_cb_->channels(),
samples_per_channel, samples_per_channel,
@ -1711,24 +1714,34 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveIdenticalResults) {
LayoutFromChannels(num_output_channels), LayoutFromChannels(num_output_channels),
float_cb_->channels())); float_cb_->channels()));
// Convert to interleaved int16. FloatToS16(float_cb_->data(), output_length, output_cb.data());
ScaleAndRoundToInt16(float_cb_->data(), output_length, output_cb.data()); for (int j = 0; j < num_output_channels; ++j) {
Interleave(output_cb.channels(), float variance = 0;
samples_per_channel, float snr = ComputeSNR(output_int16.channel(j), output_cb.channel(j),
num_output_channels, samples_per_channel, &variance);
output_int16.get()); #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
// Verify float and int16 paths produce identical output. // There are a few chunks in the fixed-point profile that give low SNR.
EXPECT_EQ(0, memcmp(frame_->data_, output_int16.get(), output_length)); // Listening confirmed the difference is acceptable.
const float kVarianceThreshold = 150;
const float kSNRThreshold = 10;
#else
const float kVarianceThreshold = 20;
const float kSNRThreshold = 20;
#endif
// Skip frames with low energy.
if (sqrt(variance) > kVarianceThreshold) {
EXPECT_LT(kSNRThreshold, snr);
}
}
analog_level = fapm->gain_control()->stream_analog_level(); analog_level = fapm->gain_control()->stream_analog_level();
EXPECT_EQ(apm_->gain_control()->stream_analog_level(), EXPECT_EQ(apm_->gain_control()->stream_analog_level(),
fapm->gain_control()->stream_analog_level()); fapm->gain_control()->stream_analog_level());
EXPECT_EQ(apm_->echo_cancellation()->stream_has_echo(), EXPECT_EQ(apm_->echo_cancellation()->stream_has_echo(),
fapm->echo_cancellation()->stream_has_echo()); fapm->echo_cancellation()->stream_has_echo());
EXPECT_EQ(apm_->voice_detection()->stream_has_voice(), EXPECT_NEAR(apm_->noise_suppression()->speech_probability(),
fapm->voice_detection()->stream_has_voice()); fapm->noise_suppression()->speech_probability(),
EXPECT_EQ(apm_->noise_suppression()->speech_probability(), 0.0005);
fapm->noise_suppression()->speech_probability());
// Reset in case of downmixing. // Reset in case of downmixing.
frame_->num_channels_ = test->num_input_channels(); frame_->num_channels_ = test->num_input_channels();
@ -2002,7 +2015,7 @@ bool ReadChunk(FILE* file, int16_t* int_data, float* float_data,
return false; // This is expected. return false; // This is expected.
} }
ScaleToFloat(int_data, frame_size, float_data); S16ToFloat(int_data, frame_size, float_data);
if (cb->num_channels() == 1) { if (cb->num_channels() == 1) {
MixStereoToMono(float_data, cb->data(), cb->samples_per_channel()); MixStereoToMono(float_data, cb->data(), cb->samples_per_channel());
} else { } else {

View File

@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree. * be found in the AUTHORS file in the root of the source tree.
*/ */
#include <math.h>
#include <limits> #include <limits>
#include "webrtc/audio_processing/debug.pb.h" #include "webrtc/audio_processing/debug.pb.h"
@ -153,4 +154,26 @@ static inline bool ReadMessageFromFile(FILE* file,
return msg->ParseFromArray(bytes.get(), size); return msg->ParseFromArray(bytes.get(), size);
} }
template <typename T>
float ComputeSNR(const T* ref, const T* test, int length, float* variance) {
float mse = 0;
float mean = 0;
*variance = 0;
for (int i = 0; i < length; ++i) {
T error = ref[i] - test[i];
mse += error * error;
*variance += ref[i] * ref[i];
mean += ref[i];
}
mse /= length;
*variance /= length;
mean /= length;
*variance -= mean * mean;
float snr = 100; // We assign 100 dB to the zero-error case.
if (mse > 0)
snr = 10 * log10(*variance / mse);
return snr;
}
} // namespace webrtc } // namespace webrtc