Reduce cost of PushSincResampler::Resample().

Ideally, PushSincResampler would have very little overhead on
SincResampler. This gets closer to that ideal.

Replace std::min/max and floor with inline functions. Add a benchmark
test to verify the improvement.

On a MacBook Retina, this results in PushSincResampler::Resample()
accounting for ~1% of CPU usage on voe_cmd_test vs the earlier ~2%
(with ISAC16 and 48 kHz audio devices).

Using the new benchmark, this results in a performance improvement of:
16 -> 44.1 : 1.7x
16 -> 48   : 1.9x
32 -> 44.1 : 1.6x
32 -> 48   : 1.7x
44.1 -> 16 : 1.5x
44.1 -> 32 : 1.7x
44.1 -> 48 : 1.7x
48 -> 16   : 1.5x
48 -> 32   : 1.5x
48 -> 44.1 : 1.8x

R=turaj@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/2157005

git-svn-id: http://webrtc.googlecode.com/svn/trunk@4695 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
andrew@webrtc.org
2013-09-06 21:15:55 +00:00
parent c7f708679d
commit b159c2e3dd
4 changed files with 85 additions and 7 deletions

View File

@@ -20,6 +20,20 @@ void ExpectArraysEq(const int16_t* ref, const int16_t* test, int length) {
}
}
TEST(AudioUtilTest, Clamp) {
EXPECT_EQ(1000.f, ClampInt16(1000.f));
EXPECT_EQ(32767.f, ClampInt16(32767.5f));
EXPECT_EQ(-32768.f, ClampInt16(-32768.5f));
}
TEST(AudioUtilTest, Round) {
EXPECT_EQ(0, RoundToInt16(0.f));
EXPECT_EQ(0, RoundToInt16(0.4f));
EXPECT_EQ(1, RoundToInt16(0.5f));
EXPECT_EQ(0, RoundToInt16(-0.4f));
EXPECT_EQ(-1, RoundToInt16(-0.5f));
}
TEST(AudioUtilTest, InterleavingStereo) {
const int16_t kInterleaved[] = {2, 3, 4, 9, 8, 27, 16, 81};
const int kSamplesPerChannel = 4;

View File

@@ -15,6 +15,20 @@
namespace webrtc {
// Clamp the floating |value| to the range representable by an int16_t.
static inline float ClampInt16(float value) {
const float kMaxInt16 = 32767.f;
const float kMinInt16 = -32768.f;
return value < kMinInt16 ? kMinInt16 :
(value > kMaxInt16 ? kMaxInt16 : value);
}
// Return a rounded int16_t of the floating |value|. Doesn't handle overflow;
// use ClampInt16 if necessary.
static inline int16_t RoundToInt16(float value) {
return static_cast<int16_t>(value < 0.f ? value - 0.5f : value + 0.5f);
}
// Deinterleave audio from |interleaved| to the channel buffers pointed to
// by |deinterleaved|. There must be sufficient space allocated in the
// |deinterleaved| buffers (|num_channel| buffers with |samples_per_channel|

View File

@@ -8,13 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
#include <math.h>
#include <string.h>
#include <algorithm>
namespace webrtc {
PushSincResampler::PushSincResampler(int source_frames,
@@ -61,10 +59,8 @@ int PushSincResampler::Resample(const int16_t* source,
resampler_->Resample(resampler_->ChunkSize(), float_buffer_.get());
resampler_->Resample(destination_frames_, float_buffer_.get());
for (int i = 0; i < destination_frames_; ++i) {
float clipped = std::max(std::min(float_buffer_[i], 32767.0f), -32768.0f);
destination[i] = static_cast<int16_t>(floor(clipped + 0.5));
}
for (int i = 0; i < destination_frames_; ++i)
destination[i] = RoundToInt16(ClampInt16(float_buffer_[i]));
source_ptr_ = NULL;
return destination_frames_;
}

View File

@@ -15,6 +15,7 @@
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
#include "webrtc/common_audio/resampler/sinusoidal_linear_chirp_source.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/system_wrappers/interface/tick_util.h"
#include "webrtc/typedefs.h"
namespace webrtc {
@@ -39,6 +40,59 @@ class PushSincResamplerTest
double low_freq_error_;
};
class ZeroSource : public SincResamplerCallback {
public:
void Run(int frames, float* destination) {
memset(destination, 0, sizeof(float) * frames);
}
};
// Disabled because it takes too long to run routinely. Use for performance
// benchmarking when needed.
TEST_P(PushSincResamplerTest, DISABLED_ResampleBenchmark) {
const int input_samples = input_rate_ / 100;
const int output_samples = output_rate_ / 100;
const int kResampleIterations = 200000;
// Source for data to be resampled.
ZeroSource resampler_source;
scoped_array<float> resampled_destination(new float[output_samples]);
scoped_array<float> source(new float[input_samples]);
scoped_array<int16_t> source_int(new int16_t[input_samples]);
scoped_array<int16_t> destination_int(new int16_t[output_samples]);
resampler_source.Run(input_samples, source.get());
for (int i = 0; i < input_samples; ++i) {
source_int[i] = static_cast<int16_t>(floor(32767 * source[i] + 0.5));
}
printf("Benchmarking %d iterations of %d Hz -> %d Hz:\n",
kResampleIterations, input_rate_, output_rate_);
const double io_ratio = input_rate_ / static_cast<double>(output_rate_);
SincResampler sinc_resampler(io_ratio, SincResampler::kDefaultRequestSize,
&resampler_source);
TickTime start = TickTime::Now();
for (int i = 0; i < kResampleIterations; ++i) {
sinc_resampler.Resample(output_samples, resampled_destination.get());
}
double total_time_sinc_us = (TickTime::Now() - start).Microseconds();
printf("SincResampler took %.2f us per frame.\n",
total_time_sinc_us / kResampleIterations);
PushSincResampler resampler(input_samples, output_samples);
start = TickTime::Now();
for (int i = 0; i < kResampleIterations; ++i) {
EXPECT_EQ(output_samples,
resampler.Resample(source_int.get(), input_samples,
destination_int.get(), output_samples));
}
double total_time_us = (TickTime::Now() - start).Microseconds();
printf("PushSincResampler took %.2f us per frame; which is a %.1f%% overhead "
"on SincResampler.\n\n", total_time_us / kResampleIterations,
(total_time_us - total_time_sinc_us) / total_time_sinc_us * 100);
}
// Tests resampling using a given input and output sample rate.
TEST_P(PushSincResamplerTest, Resample) {
// Make comparisons using one second of data.