Downstream latest Chromium SincResampler changes.
Replace the BlockSize() workaround we were using previously to support the push wrapper with the upstream request_frames interface. This requires a bit of a trick to ensure we don't add more delay than necessary. On the first pass we use a dummy Resample() call in order to prime the buffer such that all later calls only require a single input request through Run(). Notably, this brings in an optimized loop condition, improving performance by ~2% - 3% on tested platforms and avoids a 20% performance hit with clang. This addresses issue2041. Only negligible changes to the PushSincResamplerTest SNR thresholds, due to a fractional sample adjustment in output delay. This still retains the per-instance CPU detection, as webrtc lacks a LazyInstance helper for static initialization. Ideally, we would adopt SetRatio() in PushSincResampler's InitializeIfNeeded() for on-the-fly changes, but this will require a way to update request_frames. The diff against Chromium upstream is available here: https://codereview.chromium.org/19470003 BUG=2041 TESTED=unit tests, voe_cmd_test in loopback running through all codecs with 44.1 kHz and 48 kHz device formats using a stereo mic. R=dalecurtis@chromium.org Review URL: https://webrtc-codereview.appspot.com/1838004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@4406 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
@@ -38,15 +38,13 @@ int PushResampler::InitializeIfNeeded(int src_sample_rate_hz,
|
|||||||
int num_channels) {
|
int num_channels) {
|
||||||
if (src_sample_rate_hz == src_sample_rate_hz_ &&
|
if (src_sample_rate_hz == src_sample_rate_hz_ &&
|
||||||
dst_sample_rate_hz == dst_sample_rate_hz_ &&
|
dst_sample_rate_hz == dst_sample_rate_hz_ &&
|
||||||
num_channels == num_channels_) {
|
num_channels == num_channels_)
|
||||||
// No-op if settings haven't changed.
|
// No-op if settings haven't changed.
|
||||||
return 0;
|
return 0;
|
||||||
}
|
|
||||||
|
|
||||||
if (src_sample_rate_hz <= 0 || dst_sample_rate_hz <= 0 ||
|
if (src_sample_rate_hz <= 0 || dst_sample_rate_hz <= 0 ||
|
||||||
num_channels <= 0 || num_channels > 2) {
|
num_channels <= 0 || num_channels > 2)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
|
||||||
|
|
||||||
src_sample_rate_hz_ = src_sample_rate_hz;
|
src_sample_rate_hz_ = src_sample_rate_hz;
|
||||||
dst_sample_rate_hz_ = dst_sample_rate_hz;
|
dst_sample_rate_hz_ = dst_sample_rate_hz;
|
||||||
@@ -72,9 +70,8 @@ int PushResampler::Resample(const int16_t* src, int src_length,
|
|||||||
int16_t* dst, int dst_capacity) {
|
int16_t* dst, int dst_capacity) {
|
||||||
const int src_size_10ms = src_sample_rate_hz_ * num_channels_ / 100;
|
const int src_size_10ms = src_sample_rate_hz_ * num_channels_ / 100;
|
||||||
const int dst_size_10ms = dst_sample_rate_hz_ * num_channels_ / 100;
|
const int dst_size_10ms = dst_sample_rate_hz_ * num_channels_ / 100;
|
||||||
if (src_length != src_size_10ms || dst_capacity < dst_size_10ms) {
|
if (src_length != src_size_10ms || dst_capacity < dst_size_10ms)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
|
||||||
|
|
||||||
if (src_sample_rate_hz_ == dst_sample_rate_hz_) {
|
if (src_sample_rate_hz_ == dst_sample_rate_hz_) {
|
||||||
// The old resampler provides this memcpy facility in the case of matching
|
// The old resampler provides this memcpy facility in the case of matching
|
||||||
|
@@ -11,20 +11,22 @@
|
|||||||
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
|
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <cstring>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
|
|
||||||
PushSincResampler::PushSincResampler(int src_block_size,
|
PushSincResampler::PushSincResampler(int source_frames,
|
||||||
int dst_block_size)
|
int destination_frames)
|
||||||
: resampler_(NULL),
|
: resampler_(NULL),
|
||||||
float_buffer_(NULL),
|
float_buffer_(NULL),
|
||||||
source_ptr_(NULL),
|
source_ptr_(NULL),
|
||||||
dst_size_(dst_block_size) {
|
destination_frames_(destination_frames),
|
||||||
resampler_.reset(new SincResampler(src_block_size * 1.0 / dst_block_size,
|
first_pass_(true),
|
||||||
this, src_block_size));
|
source_available_(0) {
|
||||||
float_buffer_.reset(new float[dst_block_size]);
|
resampler_.reset(new SincResampler(source_frames * 1.0 / destination_frames,
|
||||||
|
source_frames, this));
|
||||||
|
float_buffer_.reset(new float[destination_frames]);
|
||||||
}
|
}
|
||||||
|
|
||||||
PushSincResampler::~PushSincResampler() {
|
PushSincResampler::~PushSincResampler() {
|
||||||
@@ -34,32 +36,53 @@ int PushSincResampler::Resample(const int16_t* source,
|
|||||||
int source_length,
|
int source_length,
|
||||||
int16_t* destination,
|
int16_t* destination,
|
||||||
int destination_capacity) {
|
int destination_capacity) {
|
||||||
assert(source_length == resampler_->BlockSize());
|
assert(source_length == resampler_->request_frames());
|
||||||
assert(destination_capacity >= dst_size_);
|
assert(destination_capacity >= destination_frames_);
|
||||||
// Cache the source pointer. Calling Resample() will immediately trigger
|
// Cache the source pointer. Calling Resample() will immediately trigger
|
||||||
// the Run() callback whereupon we provide the cached value.
|
// the Run() callback whereupon we provide the cached value.
|
||||||
source_ptr_ = source;
|
source_ptr_ = source;
|
||||||
resampler_->Resample(float_buffer_.get(), dst_size_);
|
source_available_ = source_length;
|
||||||
for (int i = 0; i < dst_size_; ++i) {
|
|
||||||
|
// On the first pass, we call Resample() twice. During the first call, we
|
||||||
|
// provide dummy input and discard the output. This is done to prime the
|
||||||
|
// SincResampler buffer with the correct delay (half the kernel size), thereby
|
||||||
|
// ensuring that all later Resample() calls will only result in one input
|
||||||
|
// request through Run().
|
||||||
|
//
|
||||||
|
// If this wasn't done, SincResampler would call Run() twice on the first
|
||||||
|
// pass, and we'd have to introduce an entire |source_frames| of delay, rather
|
||||||
|
// than the minimum half kernel.
|
||||||
|
//
|
||||||
|
// It works out that ChunkSize() is exactly the amount of output we need to
|
||||||
|
// request in order to prime the buffer with a single Run() request for
|
||||||
|
// |source_frames|.
|
||||||
|
if (first_pass_)
|
||||||
|
resampler_->Resample(resampler_->ChunkSize(), float_buffer_.get());
|
||||||
|
|
||||||
|
resampler_->Resample(destination_frames_, float_buffer_.get());
|
||||||
|
for (int i = 0; i < destination_frames_; ++i) {
|
||||||
float clipped = std::max(std::min(float_buffer_[i], 32767.0f), -32768.0f);
|
float clipped = std::max(std::min(float_buffer_[i], 32767.0f), -32768.0f);
|
||||||
destination[i] = static_cast<int16_t>(std::floor(clipped + 0.5));
|
destination[i] = static_cast<int16_t>(std::floor(clipped + 0.5));
|
||||||
}
|
}
|
||||||
source_ptr_ = NULL;
|
source_ptr_ = NULL;
|
||||||
return dst_size_;
|
return destination_frames_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void PushSincResampler::Run(float* destination, int frames) {
|
void PushSincResampler::Run(int frames, float* destination) {
|
||||||
assert(source_ptr_ != NULL);
|
assert(source_ptr_ != NULL);
|
||||||
assert(frames >= resampler_->BlockSize());
|
// Ensure we are only asked for the available samples. This would fail if
|
||||||
// We will have exactly |BlockSize| number of source samples available. If
|
// Run() was triggered more than once per Resample() call.
|
||||||
// the resampler asks for more, zero pad the beginning. This will only happen
|
assert(source_available_ == frames);
|
||||||
// on the first call while priming the buffer.
|
|
||||||
int i = 0;
|
if (first_pass_) {
|
||||||
for (; i < frames - resampler_->BlockSize(); ++i) {
|
// Provide dummy input on the first pass, the output of which will be
|
||||||
destination[i] = 0;
|
// discarded, as described in Resample().
|
||||||
}
|
memset(destination, 0, frames * sizeof(float));
|
||||||
for (int j = 0; i < frames; ++i, ++j) {
|
first_pass_ = false;
|
||||||
destination[i] = static_cast<float>(source_ptr_[j]);
|
} else {
|
||||||
|
for (int i = 0; i < frames; ++i)
|
||||||
|
destination[i] = static_cast<float>(source_ptr_[i]);
|
||||||
|
source_available_ -= frames;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -25,25 +25,33 @@ class PushSincResampler : public SincResamplerCallback {
|
|||||||
// Provide the size of the source and destination blocks in samples. These
|
// Provide the size of the source and destination blocks in samples. These
|
||||||
// must correspond to the same time duration (typically 10 ms) as the sample
|
// must correspond to the same time duration (typically 10 ms) as the sample
|
||||||
// ratio is inferred from them.
|
// ratio is inferred from them.
|
||||||
PushSincResampler(int src_block_size, int dst_block_size);
|
PushSincResampler(int source_frames, int destination_frames);
|
||||||
virtual ~PushSincResampler();
|
virtual ~PushSincResampler();
|
||||||
|
|
||||||
// Perform the resampling. |source_length| must always equal the
|
// Perform the resampling. |source_frames| must always equal the
|
||||||
// |src_block_size| provided at construction. |destination_capacity| must be
|
// |source_frames| provided at construction. |destination_capacity| must be
|
||||||
// at least as large as |dst_block_size|. Returns the number of samples
|
// at least as large as |destination_frames|. Returns the number of samples
|
||||||
// provided in destination (for convenience, since this will always be equal
|
// provided in destination (for convenience, since this will always be equal
|
||||||
// to |dst_block_size|).
|
// to |destination_frames|).
|
||||||
int Resample(const int16_t* source, int source_length,
|
int Resample(const int16_t* source, int source_frames,
|
||||||
int16_t* destination, int destination_capacity);
|
int16_t* destination, int destination_capacity);
|
||||||
|
|
||||||
// Implements SincResamplerCallback.
|
// Implements SincResamplerCallback.
|
||||||
virtual void Run(float* destination, int frames);
|
virtual void Run(int frames, float* destination);
|
||||||
|
|
||||||
|
SincResampler* get_resampler_for_testing() { return resampler_.get(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
scoped_ptr<SincResampler> resampler_;
|
scoped_ptr<SincResampler> resampler_;
|
||||||
scoped_array<float> float_buffer_;
|
scoped_array<float> float_buffer_;
|
||||||
const int16_t* source_ptr_;
|
const int16_t* source_ptr_;
|
||||||
const int dst_size_;
|
const int destination_frames_;
|
||||||
|
|
||||||
|
// True on the first call to Resample(), to prime the SincResampler buffer.
|
||||||
|
bool first_pass_;
|
||||||
|
|
||||||
|
// Used to assert we are only requested for as much data as is available.
|
||||||
|
int source_available_;
|
||||||
|
|
||||||
DISALLOW_COPY_AND_ASSIGN(PushSincResampler);
|
DISALLOW_COPY_AND_ASSIGN(PushSincResampler);
|
||||||
};
|
};
|
||||||
|
@@ -67,10 +67,19 @@ TEST_P(PushSincResamplerTest, Resample) {
|
|||||||
scoped_array<int16_t> source_int(new int16_t[input_block_size]);
|
scoped_array<int16_t> source_int(new int16_t[input_block_size]);
|
||||||
scoped_array<int16_t> destination_int(new int16_t[output_block_size]);
|
scoped_array<int16_t> destination_int(new int16_t[output_block_size]);
|
||||||
|
|
||||||
|
// The sinc resampler has an implicit delay of approximately half the kernel
|
||||||
|
// size at the input sample rate. By moving to a push model, this delay
|
||||||
|
// becomes explicit and is managed by zero-stuffing in PushSincResampler. We
|
||||||
|
// deal with it in the test by delaying the "pure" source to match. It must be
|
||||||
|
// checked before the first call to Resample(), because ChunkSize() will
|
||||||
|
// change afterwards.
|
||||||
|
const int output_delay_samples = output_block_size -
|
||||||
|
resampler.get_resampler_for_testing()->ChunkSize();
|
||||||
|
|
||||||
// Generate resampled signal.
|
// Generate resampled signal.
|
||||||
// With the PushSincResampler, we produce the signal block-by-10ms-block
|
// With the PushSincResampler, we produce the signal block-by-10ms-block
|
||||||
// rather than in a single pass, to exercise how it will be used in WebRTC.
|
// rather than in a single pass, to exercise how it will be used in WebRTC.
|
||||||
resampler_source.Run(source.get(), input_samples);
|
resampler_source.Run(input_samples, source.get());
|
||||||
for (int i = 0; i < kNumBlocks; ++i) {
|
for (int i = 0; i < kNumBlocks; ++i) {
|
||||||
for (int j = 0; j < input_block_size; ++j) {
|
for (int j = 0; j < input_block_size; ++j) {
|
||||||
source_int[j] = static_cast<int16_t>(std::floor(32767 *
|
source_int[j] = static_cast<int16_t>(std::floor(32767 *
|
||||||
@@ -86,17 +95,9 @@ TEST_P(PushSincResamplerTest, Resample) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Generate pure signal.
|
// Generate pure signal.
|
||||||
// The sinc resampler has an implicit delay of half the kernel size (32) at
|
|
||||||
// the input sample rate. By moving to a push model, this delay becomes
|
|
||||||
// explicit and is managed by zero-stuffing in PushSincResampler. This delay
|
|
||||||
// can be a fractional sample amount, so we deal with it in the test by
|
|
||||||
// delaying the "pure" source to match.
|
|
||||||
static const int kInputKernelDelaySamples = 16;
|
|
||||||
double output_delay_samples = static_cast<double>(output_rate_)
|
|
||||||
/ input_rate_ * kInputKernelDelaySamples;
|
|
||||||
SinusoidalLinearChirpSource pure_source(
|
SinusoidalLinearChirpSource pure_source(
|
||||||
output_rate_, output_samples, input_nyquist_freq, output_delay_samples);
|
output_rate_, output_samples, input_nyquist_freq, output_delay_samples);
|
||||||
pure_source.Run(pure_destination.get(), output_samples);
|
pure_source.Run(output_samples, pure_destination.get());
|
||||||
|
|
||||||
// Range of the Nyquist frequency (0.5 * min(input rate, output_rate)) which
|
// Range of the Nyquist frequency (0.5 * min(input rate, output_rate)) which
|
||||||
// we refer to as low and high.
|
// we refer to as low and high.
|
||||||
@@ -216,17 +217,17 @@ INSTANTIATE_TEST_CASE_P(
|
|||||||
std::tr1::make_tuple(8000, 16000, kResamplingRMSError, -70.30),
|
std::tr1::make_tuple(8000, 16000, kResamplingRMSError, -70.30),
|
||||||
std::tr1::make_tuple(16000, 16000, kResamplingRMSError, -75.51),
|
std::tr1::make_tuple(16000, 16000, kResamplingRMSError, -75.51),
|
||||||
std::tr1::make_tuple(32000, 16000, -18.48, -28.59),
|
std::tr1::make_tuple(32000, 16000, -18.48, -28.59),
|
||||||
std::tr1::make_tuple(44100, 16000, -19.59, -19.77),
|
std::tr1::make_tuple(44100, 16000, -19.30, -19.67),
|
||||||
std::tr1::make_tuple(48000, 16000, -20.01, -18.11),
|
std::tr1::make_tuple(48000, 16000, -19.81, -18.11),
|
||||||
std::tr1::make_tuple(96000, 16000, -20.95, -10.99),
|
std::tr1::make_tuple(96000, 16000, -20.95, -10.96),
|
||||||
|
|
||||||
// To 32 kHz
|
// To 32 kHz
|
||||||
std::tr1::make_tuple(8000, 32000, kResamplingRMSError, -70.30),
|
std::tr1::make_tuple(8000, 32000, kResamplingRMSError, -70.30),
|
||||||
std::tr1::make_tuple(16000, 32000, kResamplingRMSError, -75.51),
|
std::tr1::make_tuple(16000, 32000, kResamplingRMSError, -75.51),
|
||||||
std::tr1::make_tuple(32000, 32000, kResamplingRMSError, -75.56),
|
std::tr1::make_tuple(32000, 32000, kResamplingRMSError, -75.56),
|
||||||
std::tr1::make_tuple(44100, 32000, -16.52, -51.10),
|
std::tr1::make_tuple(44100, 32000, -16.44, -51.10),
|
||||||
std::tr1::make_tuple(48000, 32000, -16.90, -44.17),
|
std::tr1::make_tuple(48000, 32000, -16.90, -44.03),
|
||||||
std::tr1::make_tuple(96000, 32000, -19.80, -18.05),
|
std::tr1::make_tuple(96000, 32000, -19.61, -18.04),
|
||||||
std::tr1::make_tuple(192000, 32000, -21.02, -10.94)));
|
std::tr1::make_tuple(192000, 32000, -21.02, -10.94)));
|
||||||
|
|
||||||
} // namespace webrtc
|
} // namespace webrtc
|
||||||
|
@@ -11,31 +11,73 @@
|
|||||||
// Modified from the Chromium original:
|
// Modified from the Chromium original:
|
||||||
// src/media/base/sinc_resampler.cc
|
// src/media/base/sinc_resampler.cc
|
||||||
|
|
||||||
// Input buffer layout, dividing the total buffer into regions (r0_ - r5_):
|
// Initial input buffer layout, dividing into regions r0_ to r4_ (note: r0_, r3_
|
||||||
|
// and r4_ will move after the first load):
|
||||||
//
|
//
|
||||||
// |----------------|-----------------------------------------|----------------|
|
// |----------------|-----------------------------------------|----------------|
|
||||||
//
|
//
|
||||||
// kBlockSize + kKernelSize / 2
|
// request_frames_
|
||||||
// <--------------------------------------------------------->
|
// <--------------------------------------------------------->
|
||||||
// r0_
|
// r0_ (during first load)
|
||||||
//
|
//
|
||||||
// kKernelSize / 2 kKernelSize / 2 kKernelSize / 2 kKernelSize / 2
|
// kKernelSize / 2 kKernelSize / 2 kKernelSize / 2 kKernelSize / 2
|
||||||
// <---------------> <---------------> <---------------> <--------------->
|
// <---------------> <---------------> <---------------> <--------------->
|
||||||
// r1_ r2_ r3_ r4_
|
// r1_ r2_ r3_ r4_
|
||||||
//
|
//
|
||||||
// kBlockSize
|
// block_size_ == r4_ - r2_
|
||||||
// <--------------------------------------->
|
// <--------------------------------------->
|
||||||
// r5_
|
//
|
||||||
|
// request_frames_
|
||||||
|
// <------------------ ... ----------------->
|
||||||
|
// r0_ (during second load)
|
||||||
|
//
|
||||||
|
// On the second request r0_ slides to the right by kKernelSize / 2 and r3_, r4_
|
||||||
|
// and block_size_ are reinitialized via step (3) in the algorithm below.
|
||||||
|
//
|
||||||
|
// These new regions remain constant until a Flush() occurs. While complicated,
|
||||||
|
// this allows us to reduce jitter by always requesting the same amount from the
|
||||||
|
// provided callback.
|
||||||
//
|
//
|
||||||
// The algorithm:
|
// The algorithm:
|
||||||
//
|
//
|
||||||
// 1) Consume input frames into r0_ (r1_ is zero-initialized).
|
// 1) Allocate input_buffer of size: request_frames_ + kKernelSize; this ensures
|
||||||
// 2) Position kernel centered at start of r0_ (r2_) and generate output frames
|
// there's enough room to read request_frames_ from the callback into region
|
||||||
// until kernel is centered at start of r4_ or we've finished generating all
|
// r0_ (which will move between the first and subsequent passes).
|
||||||
// the output frames.
|
//
|
||||||
// 3) Copy r3_ to r1_ and r4_ to r2_.
|
// 2) Let r1_, r2_ each represent half the kernel centered around r0_:
|
||||||
// 4) Consume input frames into r5_ (zero-pad if we run out of input).
|
//
|
||||||
// 5) Goto (2) until all of input is consumed.
|
// r0_ = input_buffer_ + kKernelSize / 2
|
||||||
|
// r1_ = input_buffer_
|
||||||
|
// r2_ = r0_
|
||||||
|
//
|
||||||
|
// r0_ is always request_frames_ in size. r1_, r2_ are kKernelSize / 2 in
|
||||||
|
// size. r1_ must be zero initialized to avoid convolution with garbage (see
|
||||||
|
// step (5) for why).
|
||||||
|
//
|
||||||
|
// 3) Let r3_, r4_ each represent half the kernel right aligned with the end of
|
||||||
|
// r0_ and choose block_size_ as the distance in frames between r4_ and r2_:
|
||||||
|
//
|
||||||
|
// r3_ = r0_ + request_frames_ - kKernelSize
|
||||||
|
// r4_ = r0_ + request_frames_ - kKernelSize / 2
|
||||||
|
// block_size_ = r4_ - r2_ = request_frames_ - kKernelSize / 2
|
||||||
|
//
|
||||||
|
// 4) Consume request_frames_ frames into r0_.
|
||||||
|
//
|
||||||
|
// 5) Position kernel centered at start of r2_ and generate output frames until
|
||||||
|
// the kernel is centered at the start of r4_ or we've finished generating
|
||||||
|
// all the output frames.
|
||||||
|
//
|
||||||
|
// 6) Wrap left over data from the r3_ to r1_ and r4_ to r2_.
|
||||||
|
//
|
||||||
|
// 7) If we're on the second load, in order to avoid overwriting the frames we
|
||||||
|
// just wrapped from r4_ we need to slide r0_ to the right by the size of
|
||||||
|
// r4_, which is kKernelSize / 2:
|
||||||
|
//
|
||||||
|
// r0_ = r0_ + kKernelSize / 2 = input_buffer_ + kKernelSize
|
||||||
|
//
|
||||||
|
// r3_, r4_, and block_size_ then need to be reinitialized, so goto (3).
|
||||||
|
//
|
||||||
|
// 8) Else, if we're not on the second load, goto (4).
|
||||||
//
|
//
|
||||||
// Note: we're glossing over how the sub-sample handling works with
|
// Note: we're glossing over how the sub-sample handling works with
|
||||||
// |virtual_source_idx_|, etc.
|
// |virtual_source_idx_|, etc.
|
||||||
@@ -70,49 +112,49 @@ static double SincScaleFactor(double io_ratio) {
|
|||||||
return sinc_scale_factor;
|
return sinc_scale_factor;
|
||||||
}
|
}
|
||||||
|
|
||||||
SincResampler::SincResampler(double io_sample_rate_ratio,
|
// If we know the minimum architecture at compile time, avoid CPU detection.
|
||||||
SincResamplerCallback* read_cb,
|
// iOS lies about its architecture, so we also need to exclude it here.
|
||||||
int block_size)
|
#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WEBRTC_IOS)
|
||||||
: io_sample_rate_ratio_(io_sample_rate_ratio),
|
#if defined(__SSE__)
|
||||||
virtual_source_idx_(0),
|
#define CONVOLVE_FUNC Convolve_SSE
|
||||||
buffer_primed_(false),
|
void SincResampler::InitializeCPUSpecificFeatures() {}
|
||||||
read_cb_(read_cb),
|
#else
|
||||||
block_size_(block_size),
|
// X86 CPU detection required. Function will be set by
|
||||||
buffer_size_(block_size_ + kKernelSize),
|
// InitializeCPUSpecificFeatures().
|
||||||
// Create input buffers with a 16-byte alignment for SSE optimizations.
|
// TODO(dalecurtis): Once Chrome moves to an SSE baseline this can be removed.
|
||||||
kernel_storage_(static_cast<float*>(
|
#define CONVOLVE_FUNC convolve_proc_
|
||||||
AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))),
|
|
||||||
kernel_pre_sinc_storage_(static_cast<float*>(
|
void SincResampler::InitializeCPUSpecificFeatures() {
|
||||||
AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))),
|
convolve_proc_ = WebRtc_GetCPUInfo(kSSE2) ? Convolve_SSE : Convolve_C;
|
||||||
kernel_window_storage_(static_cast<float*>(
|
|
||||||
AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))),
|
|
||||||
input_buffer_(static_cast<float*>(
|
|
||||||
AlignedMalloc(sizeof(float) * buffer_size_, 16))),
|
|
||||||
#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE__)
|
|
||||||
convolve_proc_(WebRtc_GetCPUInfo(kSSE2) ? Convolve_SSE : Convolve_C),
|
|
||||||
#elif defined(WEBRTC_ARCH_ARM_V7) && !defined(WEBRTC_ARCH_ARM_NEON)
|
|
||||||
convolve_proc_(WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON ?
|
|
||||||
Convolve_NEON : Convolve_C),
|
|
||||||
#endif
|
|
||||||
// Setup various region pointers in the buffer (see diagram above).
|
|
||||||
r0_(input_buffer_.get() + kKernelSize / 2),
|
|
||||||
r1_(input_buffer_.get()),
|
|
||||||
r2_(r0_),
|
|
||||||
r3_(r0_ + block_size_ - kKernelSize / 2),
|
|
||||||
r4_(r0_ + block_size_),
|
|
||||||
r5_(r0_ + kKernelSize / 2) {
|
|
||||||
Initialize();
|
|
||||||
InitializeKernel();
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
#elif defined(WEBRTC_ARCH_ARM_V7)
|
||||||
|
#if defined(WEBRTC_ARCH_ARM_NEON)
|
||||||
|
#define CONVOLVE_FUNC Convolve_NEON
|
||||||
|
void SincResampler::InitializeCPUSpecificFeatures() {}
|
||||||
|
#else
|
||||||
|
// NEON CPU detection required. Function will be set by
|
||||||
|
// InitializeCPUSpecificFeatures().
|
||||||
|
#define CONVOLVE_FUNC convolve_proc_
|
||||||
|
|
||||||
|
void SincResampler::InitializeCPUSpecificFeatures() {
|
||||||
|
convolve_proc_ = WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON ?
|
||||||
|
Convolve_NEON : Convolve_C;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
// Unknown architecture.
|
||||||
|
#define CONVOLVE_FUNC Convolve_C
|
||||||
|
void SincResampler::InitializeCPUSpecificFeatures() {}
|
||||||
|
#endif
|
||||||
|
|
||||||
SincResampler::SincResampler(double io_sample_rate_ratio,
|
SincResampler::SincResampler(double io_sample_rate_ratio,
|
||||||
|
int request_frames,
|
||||||
SincResamplerCallback* read_cb)
|
SincResamplerCallback* read_cb)
|
||||||
: io_sample_rate_ratio_(io_sample_rate_ratio),
|
: io_sample_rate_ratio_(io_sample_rate_ratio),
|
||||||
virtual_source_idx_(0),
|
|
||||||
buffer_primed_(false),
|
|
||||||
read_cb_(read_cb),
|
read_cb_(read_cb),
|
||||||
block_size_(kDefaultBlockSize),
|
request_frames_(request_frames),
|
||||||
buffer_size_(kDefaultBufferSize),
|
input_buffer_size_(request_frames_ + kKernelSize),
|
||||||
// Create input buffers with a 16-byte alignment for SSE optimizations.
|
// Create input buffers with a 16-byte alignment for SSE optimizations.
|
||||||
kernel_storage_(static_cast<float*>(
|
kernel_storage_(static_cast<float*>(
|
||||||
AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))),
|
AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))),
|
||||||
@@ -121,45 +163,19 @@ SincResampler::SincResampler(double io_sample_rate_ratio,
|
|||||||
kernel_window_storage_(static_cast<float*>(
|
kernel_window_storage_(static_cast<float*>(
|
||||||
AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))),
|
AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))),
|
||||||
input_buffer_(static_cast<float*>(
|
input_buffer_(static_cast<float*>(
|
||||||
AlignedMalloc(sizeof(float) * buffer_size_, 16))),
|
AlignedMalloc(sizeof(float) * input_buffer_size_, 16))),
|
||||||
#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE__)
|
#if defined(WEBRTC_RESAMPLER_CPU_DETECTION)
|
||||||
convolve_proc_(WebRtc_GetCPUInfo(kSSE2) ? Convolve_SSE : Convolve_C),
|
convolve_proc_(NULL),
|
||||||
#elif defined(WEBRTC_ARCH_ARM_V7) && !defined(WEBRTC_ARCH_ARM_NEON)
|
|
||||||
convolve_proc_(WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON ?
|
|
||||||
Convolve_NEON : Convolve_C),
|
|
||||||
#endif
|
#endif
|
||||||
// Setup various region pointers in the buffer (see diagram above).
|
|
||||||
r0_(input_buffer_.get() + kKernelSize / 2),
|
|
||||||
r1_(input_buffer_.get()),
|
r1_(input_buffer_.get()),
|
||||||
r2_(r0_),
|
r2_(input_buffer_.get() + kKernelSize / 2) {
|
||||||
r3_(r0_ + block_size_ - kKernelSize / 2),
|
#if defined(WEBRTC_RESAMPLER_CPU_DETECTION)
|
||||||
r4_(r0_ + block_size_),
|
InitializeCPUSpecificFeatures();
|
||||||
r5_(r0_ + kKernelSize / 2) {
|
assert(convolve_proc_);
|
||||||
Initialize();
|
#endif
|
||||||
InitializeKernel();
|
assert(request_frames_ > 0);
|
||||||
}
|
Flush();
|
||||||
|
|
||||||
SincResampler::~SincResampler() {}
|
|
||||||
|
|
||||||
void SincResampler::Initialize() {
|
|
||||||
// Ensure kKernelSize is a multiple of 32 for easy SSE optimizations; causes
|
|
||||||
// r0_ and r5_ (used for input) to always be 16-byte aligned by virtue of
|
|
||||||
// input_buffer_ being 16-byte aligned.
|
|
||||||
COMPILE_ASSERT(kKernelSize % 32 == 0);
|
|
||||||
assert(block_size_ > kKernelSize);
|
assert(block_size_ > kKernelSize);
|
||||||
// Basic sanity checks to ensure buffer regions are laid out correctly:
|
|
||||||
// r0_ and r2_ should always be the same position.
|
|
||||||
assert(r0_ == r2_);
|
|
||||||
// r1_ at the beginning of the buffer.
|
|
||||||
assert(r1_ == input_buffer_.get());
|
|
||||||
// r1_ left of r2_, r2_ left of r5_ and r1_, r2_ size correct.
|
|
||||||
assert(r2_ - r1_ == r5_ - r2_);
|
|
||||||
// r3_ left of r4_, r5_ left of r0_ and r3_ size correct.
|
|
||||||
assert(r4_ - r3_ == r5_ - r0_);
|
|
||||||
// r3_, r4_ size correct and r4_ at the end of the buffer.
|
|
||||||
assert(r4_ + (r4_ - r3_) == r1_ + buffer_size_);
|
|
||||||
// r5_ size correct and at the end of the buffer.
|
|
||||||
assert(r5_ + block_size_ == r1_ + buffer_size_);
|
|
||||||
|
|
||||||
memset(kernel_storage_.get(), 0,
|
memset(kernel_storage_.get(), 0,
|
||||||
sizeof(*kernel_storage_.get()) * kKernelStorageSize);
|
sizeof(*kernel_storage_.get()) * kKernelStorageSize);
|
||||||
@@ -167,7 +183,26 @@ void SincResampler::Initialize() {
|
|||||||
sizeof(*kernel_pre_sinc_storage_.get()) * kKernelStorageSize);
|
sizeof(*kernel_pre_sinc_storage_.get()) * kKernelStorageSize);
|
||||||
memset(kernel_window_storage_.get(), 0,
|
memset(kernel_window_storage_.get(), 0,
|
||||||
sizeof(*kernel_window_storage_.get()) * kKernelStorageSize);
|
sizeof(*kernel_window_storage_.get()) * kKernelStorageSize);
|
||||||
memset(input_buffer_.get(), 0, sizeof(*input_buffer_.get()) * buffer_size_);
|
|
||||||
|
InitializeKernel();
|
||||||
|
}
|
||||||
|
|
||||||
|
SincResampler::~SincResampler() {}
|
||||||
|
|
||||||
|
void SincResampler::UpdateRegions(bool second_load) {
|
||||||
|
// Setup various region pointers in the buffer (see diagram above). If we're
|
||||||
|
// on the second load we need to slide r0_ to the right by kKernelSize / 2.
|
||||||
|
r0_ = input_buffer_.get() + (second_load ? kKernelSize : kKernelSize / 2);
|
||||||
|
r3_ = r0_ + request_frames_ - kKernelSize;
|
||||||
|
r4_ = r0_ + request_frames_ - kKernelSize / 2;
|
||||||
|
block_size_ = r4_ - r2_;
|
||||||
|
|
||||||
|
// r1_ at the beginning of the buffer.
|
||||||
|
assert(r1_ == input_buffer_.get());
|
||||||
|
// r1_ left of r2_, r4_ left of r3_ and size correct.
|
||||||
|
assert(r2_ - r1_ == r4_ - r3_);
|
||||||
|
// r2_ left of r3.
|
||||||
|
assert(r2_ < r3_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SincResampler::InitializeKernel() {
|
void SincResampler::InitializeKernel() {
|
||||||
@@ -234,67 +269,59 @@ void SincResampler::SetRatio(double io_sample_rate_ratio) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we know the minimum architecture avoid function hopping for CPU detection.
|
void SincResampler::Resample(int frames, float* destination) {
|
||||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
|
||||||
#if defined(__SSE__)
|
|
||||||
#define CONVOLVE_FUNC Convolve_SSE
|
|
||||||
#else
|
|
||||||
// X86 CPU detection required. |convolve_proc_| will be set upon construction.
|
|
||||||
// TODO(dalecurtis): Once Chrome moves to a SSE baseline this can be removed.
|
|
||||||
#define CONVOLVE_FUNC convolve_proc_
|
|
||||||
#endif
|
|
||||||
#elif defined(WEBRTC_ARCH_ARM_V7)
|
|
||||||
#if defined(WEBRTC_ARCH_ARM_NEON)
|
|
||||||
#define CONVOLVE_FUNC Convolve_NEON
|
|
||||||
#else
|
|
||||||
// NEON CPU detection required. |convolve_proc_| will be set upon construction.
|
|
||||||
#define CONVOLVE_FUNC convolve_proc_
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
// Unknown architecture.
|
|
||||||
#define CONVOLVE_FUNC Convolve_C
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void SincResampler::Resample(float* destination, int frames) {
|
|
||||||
int remaining_frames = frames;
|
int remaining_frames = frames;
|
||||||
|
|
||||||
// Step (1) -- Prime the input buffer at the start of the input stream.
|
// Step (1) -- Prime the input buffer at the start of the input stream.
|
||||||
if (!buffer_primed_) {
|
if (!buffer_primed_ && remaining_frames) {
|
||||||
read_cb_->Run(r0_, block_size_ + kKernelSize / 2);
|
read_cb_->Run(request_frames_, r0_);
|
||||||
buffer_primed_ = true;
|
buffer_primed_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step (2) -- Resample!
|
// Step (2) -- Resample! const what we can outside of the loop for speed. It
|
||||||
|
// actually has an impact on ARM performance. See inner loop comment below.
|
||||||
|
const double current_io_ratio = io_sample_rate_ratio_;
|
||||||
|
const float* const kernel_ptr = kernel_storage_.get();
|
||||||
while (remaining_frames) {
|
while (remaining_frames) {
|
||||||
while (virtual_source_idx_ < block_size_) {
|
// |i| may be negative if the last Resample() call ended on an iteration
|
||||||
|
// that put |virtual_source_idx_| over the limit.
|
||||||
|
//
|
||||||
|
// Note: The loop construct here can severely impact performance on ARM
|
||||||
|
// or when built with clang. See https://codereview.chromium.org/18566009/
|
||||||
|
for (int i = ceil((block_size_ - virtual_source_idx_) / current_io_ratio);
|
||||||
|
i > 0; --i) {
|
||||||
|
assert(virtual_source_idx_ < block_size_);
|
||||||
|
|
||||||
// |virtual_source_idx_| lies in between two kernel offsets so figure out
|
// |virtual_source_idx_| lies in between two kernel offsets so figure out
|
||||||
// what they are.
|
// what they are.
|
||||||
int source_idx = static_cast<int>(virtual_source_idx_);
|
const int source_idx = virtual_source_idx_;
|
||||||
double subsample_remainder = virtual_source_idx_ - source_idx;
|
const double subsample_remainder = virtual_source_idx_ - source_idx;
|
||||||
|
|
||||||
double virtual_offset_idx = subsample_remainder * kKernelOffsetCount;
|
const double virtual_offset_idx =
|
||||||
int offset_idx = static_cast<int>(virtual_offset_idx);
|
subsample_remainder * kKernelOffsetCount;
|
||||||
|
const int offset_idx = virtual_offset_idx;
|
||||||
|
|
||||||
// We'll compute "convolutions" for the two kernels which straddle
|
// We'll compute "convolutions" for the two kernels which straddle
|
||||||
// |virtual_source_idx_|.
|
// |virtual_source_idx_|.
|
||||||
float* k1 = kernel_storage_.get() + offset_idx * kKernelSize;
|
const float* const k1 = kernel_ptr + offset_idx * kKernelSize;
|
||||||
float* k2 = k1 + kKernelSize;
|
const float* const k2 = k1 + kKernelSize;
|
||||||
|
|
||||||
// Ensure |k1|, |k2| are 16-byte aligned for SIMD usage. Should always be
|
// Ensure |k1|, |k2| are 16-byte aligned for SIMD usage. Should always be
|
||||||
// true so long as kKernelSize is a multiple of 16.
|
// true so long as kKernelSize is a multiple of 16.
|
||||||
assert((reinterpret_cast<uintptr_t>(k1) & 0x0F) == 0u);
|
assert(0u == (reinterpret_cast<uintptr_t>(k1) & 0x0F));
|
||||||
assert((reinterpret_cast<uintptr_t>(k2) & 0x0F) == 0u);
|
assert(0u == (reinterpret_cast<uintptr_t>(k2) & 0x0F));
|
||||||
|
|
||||||
// Initialize input pointer based on quantized |virtual_source_idx_|.
|
// Initialize input pointer based on quantized |virtual_source_idx_|.
|
||||||
float* input_ptr = r1_ + source_idx;
|
const float* const input_ptr = r1_ + source_idx;
|
||||||
|
|
||||||
// Figure out how much to weight each kernel's "convolution".
|
// Figure out how much to weight each kernel's "convolution".
|
||||||
double kernel_interpolation_factor = virtual_offset_idx - offset_idx;
|
const double kernel_interpolation_factor =
|
||||||
|
virtual_offset_idx - offset_idx;
|
||||||
*destination++ = CONVOLVE_FUNC(
|
*destination++ = CONVOLVE_FUNC(
|
||||||
input_ptr, k1, k2, kernel_interpolation_factor);
|
input_ptr, k1, k2, kernel_interpolation_factor);
|
||||||
|
|
||||||
// Advance the virtual index.
|
// Advance the virtual index.
|
||||||
virtual_source_idx_ += io_sample_rate_ratio_;
|
virtual_source_idx_ += current_io_ratio;
|
||||||
|
|
||||||
if (!--remaining_frames)
|
if (!--remaining_frames)
|
||||||
return;
|
return;
|
||||||
@@ -303,31 +330,31 @@ void SincResampler::Resample(float* destination, int frames) {
|
|||||||
// Wrap back around to the start.
|
// Wrap back around to the start.
|
||||||
virtual_source_idx_ -= block_size_;
|
virtual_source_idx_ -= block_size_;
|
||||||
|
|
||||||
// Step (3) Copy r3_ to r1_ and r4_ to r2_.
|
// Step (3) -- Copy r3_, r4_ to r1_, r2_.
|
||||||
// This wraps the last input frames back to the start of the buffer.
|
// This wraps the last input frames back to the start of the buffer.
|
||||||
memcpy(r1_, r3_, sizeof(*input_buffer_.get()) * (kKernelSize / 2));
|
memcpy(r1_, r3_, sizeof(*input_buffer_.get()) * kKernelSize);
|
||||||
memcpy(r2_, r4_, sizeof(*input_buffer_.get()) * (kKernelSize / 2));
|
|
||||||
|
|
||||||
// Step (4)
|
// Step (4) -- Reinitialize regions if necessary.
|
||||||
// Refresh the buffer with more input.
|
if (r0_ == r2_)
|
||||||
read_cb_->Run(r5_, block_size_);
|
UpdateRegions(true);
|
||||||
|
|
||||||
|
// Step (5) -- Refresh the buffer with more input.
|
||||||
|
read_cb_->Run(request_frames_, r0_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#undef CONVOLVE_FUNC
|
#undef CONVOLVE_FUNC
|
||||||
|
|
||||||
int SincResampler::ChunkSize() {
|
int SincResampler::ChunkSize() const {
|
||||||
return block_size_ / io_sample_rate_ratio_;
|
return block_size_ / io_sample_rate_ratio_;
|
||||||
}
|
}
|
||||||
|
|
||||||
int SincResampler::BlockSize() {
|
|
||||||
return block_size_;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SincResampler::Flush() {
|
void SincResampler::Flush() {
|
||||||
virtual_source_idx_ = 0;
|
virtual_source_idx_ = 0;
|
||||||
buffer_primed_ = false;
|
buffer_primed_ = false;
|
||||||
memset(input_buffer_.get(), 0, sizeof(*input_buffer_.get()) * buffer_size_);
|
memset(input_buffer_.get(), 0,
|
||||||
|
sizeof(*input_buffer_.get()) * input_buffer_size_);
|
||||||
|
UpdateRegions(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
float SincResampler::Convolve_C(const float* input_ptr, const float* k1,
|
float SincResampler::Convolve_C(const float* input_ptr, const float* k1,
|
||||||
|
@@ -20,6 +20,13 @@
|
|||||||
#include "webrtc/test/testsupport/gtest_prod_util.h"
|
#include "webrtc/test/testsupport/gtest_prod_util.h"
|
||||||
#include "webrtc/typedefs.h"
|
#include "webrtc/typedefs.h"
|
||||||
|
|
||||||
|
#if (defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WEBRTC_IOS) && \
|
||||||
|
!defined(__SSE__)) || \
|
||||||
|
(defined(WEBRTC_ARCH_ARM_V7) && !defined(WEBRTC_ARCH_ARM_NEON))
|
||||||
|
// Convenience define.
|
||||||
|
#define WEBRTC_RESAMPLER_CPU_DETECTION
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace webrtc {
|
namespace webrtc {
|
||||||
|
|
||||||
// Callback class for providing more data into the resampler. Expects |frames|
|
// Callback class for providing more data into the resampler. Expects |frames|
|
||||||
@@ -28,7 +35,7 @@ namespace webrtc {
|
|||||||
class SincResamplerCallback {
|
class SincResamplerCallback {
|
||||||
public:
|
public:
|
||||||
virtual ~SincResamplerCallback() {}
|
virtual ~SincResamplerCallback() {}
|
||||||
virtual void Run(float* destination, int frames) = 0;
|
virtual void Run(int frames, float* destination) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
// SincResampler is a high-quality single-channel sample-rate converter.
|
// SincResampler is a high-quality single-channel sample-rate converter.
|
||||||
@@ -40,43 +47,36 @@ class SincResampler {
|
|||||||
// TODO(dalecurtis): Test performance to see if we can jack this up to 64+.
|
// TODO(dalecurtis): Test performance to see if we can jack this up to 64+.
|
||||||
kKernelSize = 32,
|
kKernelSize = 32,
|
||||||
|
|
||||||
// The number of destination frames generated per processing pass. Affects
|
// Default request size. Affects how often and for how much SincResampler
|
||||||
// how often and for how much SincResampler calls back for input. Must be
|
// calls back for input. Must be greater than kKernelSize.
|
||||||
// greater than kKernelSize.
|
kDefaultRequestSize = 512,
|
||||||
kDefaultBlockSize = 512,
|
|
||||||
|
|
||||||
// The kernel offset count is used for interpolation and is the number of
|
// The kernel offset count is used for interpolation and is the number of
|
||||||
// sub-sample kernel shifts. Can be adjusted for quality (higher is better)
|
// sub-sample kernel shifts. Can be adjusted for quality (higher is better)
|
||||||
// at the expense of allocating more memory.
|
// at the expense of allocating more memory.
|
||||||
kKernelOffsetCount = 32,
|
kKernelOffsetCount = 32,
|
||||||
kKernelStorageSize = kKernelSize * (kKernelOffsetCount + 1),
|
kKernelStorageSize = kKernelSize * (kKernelOffsetCount + 1),
|
||||||
|
|
||||||
// The size (in samples) of the internal buffer used by the resampler.
|
|
||||||
kDefaultBufferSize = kDefaultBlockSize + kKernelSize,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Constructs a SincResampler with the specified |read_cb|, which is used to
|
// Constructs a SincResampler with the specified |read_cb|, which is used to
|
||||||
// acquire audio data for resampling. |io_sample_rate_ratio| is the ratio of
|
// acquire audio data for resampling. |io_sample_rate_ratio| is the ratio
|
||||||
// input / output sample rates. If desired, the number of destination frames
|
// of input / output sample rates. |request_frames| controls the size in
|
||||||
// generated per processing pass can be specified through |block_size|.
|
// frames of the buffer requested by each |read_cb| call. The value must be
|
||||||
|
// greater than kKernelSize. Specify kDefaultRequestSize if there are no
|
||||||
|
// request size constraints.
|
||||||
SincResampler(double io_sample_rate_ratio,
|
SincResampler(double io_sample_rate_ratio,
|
||||||
|
int request_frames,
|
||||||
SincResamplerCallback* read_cb);
|
SincResamplerCallback* read_cb);
|
||||||
SincResampler(double io_sample_rate_ratio,
|
|
||||||
SincResamplerCallback* read_cb,
|
|
||||||
int block_size);
|
|
||||||
virtual ~SincResampler();
|
virtual ~SincResampler();
|
||||||
|
|
||||||
// Resample |frames| of data from |read_cb_| into |destination|.
|
// Resample |frames| of data from |read_cb_| into |destination|.
|
||||||
void Resample(float* destination, int frames);
|
void Resample(int frames, float* destination);
|
||||||
|
|
||||||
// The maximum size in frames that guarantees Resample() will only make a
|
// The maximum size in frames that guarantees Resample() will only make a
|
||||||
// single call to |read_cb_| for more data.
|
// single call to |read_cb_| for more data.
|
||||||
int ChunkSize();
|
int ChunkSize() const;
|
||||||
|
|
||||||
// The number of source frames requested per processing pass (and equal to
|
int request_frames() const { return request_frames_; }
|
||||||
// |block_size| if provided at construction). The first pass will request
|
|
||||||
// more to prime the buffer.
|
|
||||||
int BlockSize();
|
|
||||||
|
|
||||||
// Flush all buffered data and reset internal indices. Not thread safe, do
|
// Flush all buffered data and reset internal indices. Not thread safe, do
|
||||||
// not call while Resample() is in progress.
|
// not call while Resample() is in progress.
|
||||||
@@ -86,8 +86,8 @@ class SincResampler {
|
|||||||
// the kernels used for resampling. Not thread safe, do not call while
|
// the kernels used for resampling. Not thread safe, do not call while
|
||||||
// Resample() is in progress.
|
// Resample() is in progress.
|
||||||
//
|
//
|
||||||
// TODO(ajm): use this in PushSincResampler rather than reconstructing
|
// TODO(ajm): Use this in PushSincResampler rather than reconstructing
|
||||||
// SincResampler.
|
// SincResampler. We would also need a way to update |request_frames_|.
|
||||||
void SetRatio(double io_sample_rate_ratio);
|
void SetRatio(double io_sample_rate_ratio);
|
||||||
|
|
||||||
float* get_kernel_for_testing() { return kernel_storage_.get(); }
|
float* get_kernel_for_testing() { return kernel_storage_.get(); }
|
||||||
@@ -96,8 +96,14 @@ class SincResampler {
|
|||||||
FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, Convolve);
|
FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, Convolve);
|
||||||
FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, ConvolveBenchmark);
|
FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, ConvolveBenchmark);
|
||||||
|
|
||||||
void Initialize();
|
|
||||||
void InitializeKernel();
|
void InitializeKernel();
|
||||||
|
void UpdateRegions(bool second_load);
|
||||||
|
|
||||||
|
// Selects runtime specific CPU features like SSE. Must be called before
|
||||||
|
// using SincResampler.
|
||||||
|
// TODO(ajm): Currently managed by the class internally. See the note with
|
||||||
|
// |convolve_proc_| below.
|
||||||
|
void InitializeCPUSpecificFeatures();
|
||||||
|
|
||||||
// Compute convolution of |k1| and |k2| over |input_ptr|, resultant sums are
|
// Compute convolution of |k1| and |k2| over |input_ptr|, resultant sums are
|
||||||
// linearly interpolated using |kernel_interpolation_factor|. On x86, the
|
// linearly interpolated using |kernel_interpolation_factor|. On x86, the
|
||||||
@@ -128,11 +134,14 @@ class SincResampler {
|
|||||||
// Source of data for resampling.
|
// Source of data for resampling.
|
||||||
SincResamplerCallback* read_cb_;
|
SincResamplerCallback* read_cb_;
|
||||||
|
|
||||||
// See kDefaultBlockSize.
|
// The size (in samples) to request from each |read_cb_| execution.
|
||||||
|
const int request_frames_;
|
||||||
|
|
||||||
|
// The number of source frames processed per pass.
|
||||||
int block_size_;
|
int block_size_;
|
||||||
|
|
||||||
// See kDefaultBufferSize.
|
// The size (in samples) of the internal buffer used by the resampler.
|
||||||
int buffer_size_;
|
const int input_buffer_size_;
|
||||||
|
|
||||||
// Contains kKernelOffsetCount kernels back-to-back, each of size kKernelSize.
|
// Contains kKernelOffsetCount kernels back-to-back, each of size kKernelSize.
|
||||||
// The kernel offsets are sub-sample shifts of a windowed sinc shifted from
|
// The kernel offsets are sub-sample shifts of a windowed sinc shifted from
|
||||||
@@ -145,21 +154,22 @@ class SincResampler {
|
|||||||
scoped_ptr_malloc<float, AlignedFree> input_buffer_;
|
scoped_ptr_malloc<float, AlignedFree> input_buffer_;
|
||||||
|
|
||||||
// Stores the runtime selection of which Convolve function to use.
|
// Stores the runtime selection of which Convolve function to use.
|
||||||
#if (defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE__)) || \
|
// TODO(ajm): Move to using a global static which must only be initialized
|
||||||
(defined(WEBRTC_ARCH_ARM_V7) && !defined(WEBRTC_ARCH_ARM_NEON))
|
// once by the user. We're not doing this initially, because we don't have
|
||||||
|
// e.g. a LazyInstance helper in webrtc.
|
||||||
|
#if defined(WEBRTC_RESAMPLER_CPU_DETECTION)
|
||||||
typedef float (*ConvolveProc)(const float*, const float*, const float*,
|
typedef float (*ConvolveProc)(const float*, const float*, const float*,
|
||||||
double);
|
double);
|
||||||
const ConvolveProc convolve_proc_;
|
ConvolveProc convolve_proc_;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Pointers to the various regions inside |input_buffer_|. See the diagram at
|
// Pointers to the various regions inside |input_buffer_|. See the diagram at
|
||||||
// the top of the .cc file for more information.
|
// the top of the .cc file for more information.
|
||||||
float* const r0_;
|
float* r0_;
|
||||||
float* const r1_;
|
float* const r1_;
|
||||||
float* const r2_;
|
float* const r2_;
|
||||||
float* const r3_;
|
float* r3_;
|
||||||
float* const r4_;
|
float* r4_;
|
||||||
float* const r5_;
|
|
||||||
|
|
||||||
DISALLOW_COPY_AND_ASSIGN(SincResampler);
|
DISALLOW_COPY_AND_ASSIGN(SincResampler);
|
||||||
};
|
};
|
||||||
|
@@ -36,18 +36,18 @@ static const double kKernelInterpolationFactor = 0.5;
|
|||||||
// Helper class to ensure ChunkedResample() functions properly.
|
// Helper class to ensure ChunkedResample() functions properly.
|
||||||
class MockSource : public SincResamplerCallback {
|
class MockSource : public SincResamplerCallback {
|
||||||
public:
|
public:
|
||||||
MOCK_METHOD2(Run, void(float* destination, int frames));
|
MOCK_METHOD2(Run, void(int frames, float* destination));
|
||||||
};
|
};
|
||||||
|
|
||||||
ACTION(ClearBuffer) {
|
ACTION(ClearBuffer) {
|
||||||
memset(arg0, 0, arg1 * sizeof(float));
|
memset(arg1, 0, arg0 * sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTION(FillBuffer) {
|
ACTION(FillBuffer) {
|
||||||
// Value chosen arbitrarily such that SincResampler resamples it to something
|
// Value chosen arbitrarily such that SincResampler resamples it to something
|
||||||
// easily representable on all platforms; e.g., using kSampleRateRatio this
|
// easily representable on all platforms; e.g., using kSampleRateRatio this
|
||||||
// becomes 1.81219.
|
// becomes 1.81219.
|
||||||
memset(arg0, 64, arg1 * sizeof(float));
|
memset(arg1, 64, arg0 * sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test requesting multiples of ChunkSize() frames results in the proper number
|
// Test requesting multiples of ChunkSize() frames results in the proper number
|
||||||
@@ -57,7 +57,8 @@ TEST(SincResamplerTest, ChunkedResample) {
|
|||||||
|
|
||||||
// Choose a high ratio of input to output samples which will result in quick
|
// Choose a high ratio of input to output samples which will result in quick
|
||||||
// exhaustion of SincResampler's internal buffers.
|
// exhaustion of SincResampler's internal buffers.
|
||||||
SincResampler resampler(kSampleRateRatio, &mock_source);
|
SincResampler resampler(kSampleRateRatio, SincResampler::kDefaultRequestSize,
|
||||||
|
&mock_source);
|
||||||
|
|
||||||
static const int kChunks = 2;
|
static const int kChunks = 2;
|
||||||
int max_chunk_size = resampler.ChunkSize() * kChunks;
|
int max_chunk_size = resampler.ChunkSize() * kChunks;
|
||||||
@@ -66,25 +67,26 @@ TEST(SincResamplerTest, ChunkedResample) {
|
|||||||
// Verify requesting ChunkSize() frames causes a single callback.
|
// Verify requesting ChunkSize() frames causes a single callback.
|
||||||
EXPECT_CALL(mock_source, Run(_, _))
|
EXPECT_CALL(mock_source, Run(_, _))
|
||||||
.Times(1).WillOnce(ClearBuffer());
|
.Times(1).WillOnce(ClearBuffer());
|
||||||
resampler.Resample(resampled_destination.get(), resampler.ChunkSize());
|
resampler.Resample(resampler.ChunkSize(), resampled_destination.get());
|
||||||
|
|
||||||
// Verify requesting kChunks * ChunkSize() frames causes kChunks callbacks.
|
// Verify requesting kChunks * ChunkSize() frames causes kChunks callbacks.
|
||||||
testing::Mock::VerifyAndClear(&mock_source);
|
testing::Mock::VerifyAndClear(&mock_source);
|
||||||
EXPECT_CALL(mock_source, Run(_, _))
|
EXPECT_CALL(mock_source, Run(_, _))
|
||||||
.Times(kChunks).WillRepeatedly(ClearBuffer());
|
.Times(kChunks).WillRepeatedly(ClearBuffer());
|
||||||
resampler.Resample(resampled_destination.get(), max_chunk_size);
|
resampler.Resample(max_chunk_size, resampled_destination.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test flush resets the internal state properly.
|
// Test flush resets the internal state properly.
|
||||||
TEST(SincResamplerTest, Flush) {
|
TEST(SincResamplerTest, Flush) {
|
||||||
MockSource mock_source;
|
MockSource mock_source;
|
||||||
SincResampler resampler(kSampleRateRatio, &mock_source);
|
SincResampler resampler(kSampleRateRatio, SincResampler::kDefaultRequestSize,
|
||||||
|
&mock_source);
|
||||||
scoped_array<float> resampled_destination(new float[resampler.ChunkSize()]);
|
scoped_array<float> resampled_destination(new float[resampler.ChunkSize()]);
|
||||||
|
|
||||||
// Fill the resampler with junk data.
|
// Fill the resampler with junk data.
|
||||||
EXPECT_CALL(mock_source, Run(_, _))
|
EXPECT_CALL(mock_source, Run(_, _))
|
||||||
.Times(1).WillOnce(FillBuffer());
|
.Times(1).WillOnce(FillBuffer());
|
||||||
resampler.Resample(resampled_destination.get(), resampler.ChunkSize() / 2);
|
resampler.Resample(resampler.ChunkSize() / 2, resampled_destination.get());
|
||||||
ASSERT_NE(resampled_destination[0], 0);
|
ASSERT_NE(resampled_destination[0], 0);
|
||||||
|
|
||||||
// Flush and request more data, which should all be zeros now.
|
// Flush and request more data, which should all be zeros now.
|
||||||
@@ -92,11 +94,25 @@ TEST(SincResamplerTest, Flush) {
|
|||||||
testing::Mock::VerifyAndClear(&mock_source);
|
testing::Mock::VerifyAndClear(&mock_source);
|
||||||
EXPECT_CALL(mock_source, Run(_, _))
|
EXPECT_CALL(mock_source, Run(_, _))
|
||||||
.Times(1).WillOnce(ClearBuffer());
|
.Times(1).WillOnce(ClearBuffer());
|
||||||
resampler.Resample(resampled_destination.get(), resampler.ChunkSize() / 2);
|
resampler.Resample(resampler.ChunkSize() / 2, resampled_destination.get());
|
||||||
for (int i = 0; i < resampler.ChunkSize() / 2; ++i)
|
for (int i = 0; i < resampler.ChunkSize() / 2; ++i)
|
||||||
ASSERT_FLOAT_EQ(resampled_destination[i], 0);
|
ASSERT_FLOAT_EQ(resampled_destination[i], 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test flush resets the internal state properly.
|
||||||
|
TEST(SincResamplerTest, DISABLED_SetRatioBench) {
|
||||||
|
MockSource mock_source;
|
||||||
|
SincResampler resampler(kSampleRateRatio, SincResampler::kDefaultRequestSize,
|
||||||
|
&mock_source);
|
||||||
|
|
||||||
|
TickTime start = TickTime::Now();
|
||||||
|
for (int i = 1; i < 10000; ++i)
|
||||||
|
resampler.SetRatio(1.0 / i);
|
||||||
|
double total_time_c_us = (TickTime::Now() - start).Microseconds();
|
||||||
|
printf("SetRatio() took %.2fms.\n", total_time_c_us / 1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Define platform independent function name for Convolve* tests.
|
// Define platform independent function name for Convolve* tests.
|
||||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||||
#define CONVOLVE_FUNC Convolve_SSE
|
#define CONVOLVE_FUNC Convolve_SSE
|
||||||
@@ -117,7 +133,8 @@ TEST(SincResamplerTest, Convolve) {
|
|||||||
|
|
||||||
// Initialize a dummy resampler.
|
// Initialize a dummy resampler.
|
||||||
MockSource mock_source;
|
MockSource mock_source;
|
||||||
SincResampler resampler(kSampleRateRatio, &mock_source);
|
SincResampler resampler(kSampleRateRatio, SincResampler::kDefaultRequestSize,
|
||||||
|
&mock_source);
|
||||||
|
|
||||||
// The optimized Convolve methods are slightly more precise than Convolve_C(),
|
// The optimized Convolve methods are slightly more precise than Convolve_C(),
|
||||||
// so comparison must be done using an epsilon.
|
// so comparison must be done using an epsilon.
|
||||||
@@ -150,7 +167,8 @@ TEST(SincResamplerTest, Convolve) {
|
|||||||
TEST(SincResamplerTest, ConvolveBenchmark) {
|
TEST(SincResamplerTest, ConvolveBenchmark) {
|
||||||
// Initialize a dummy resampler.
|
// Initialize a dummy resampler.
|
||||||
MockSource mock_source;
|
MockSource mock_source;
|
||||||
SincResampler resampler(kSampleRateRatio, &mock_source);
|
SincResampler resampler(kSampleRateRatio, SincResampler::kDefaultRequestSize,
|
||||||
|
&mock_source);
|
||||||
|
|
||||||
// Retrieve benchmark iterations from command line.
|
// Retrieve benchmark iterations from command line.
|
||||||
// TODO(ajm): Reintroduce this as a command line option.
|
// TODO(ajm): Reintroduce this as a command line option.
|
||||||
@@ -243,9 +261,8 @@ TEST_P(SincResamplerTest, Resample) {
|
|||||||
input_rate_, input_samples, input_nyquist_freq, 0);
|
input_rate_, input_samples, input_nyquist_freq, 0);
|
||||||
|
|
||||||
const double io_ratio = input_rate_ / static_cast<double>(output_rate_);
|
const double io_ratio = input_rate_ / static_cast<double>(output_rate_);
|
||||||
SincResampler resampler(
|
SincResampler resampler(io_ratio, SincResampler::kDefaultRequestSize,
|
||||||
io_ratio,
|
&resampler_source);
|
||||||
&resampler_source);
|
|
||||||
|
|
||||||
// Force an update to the sample rate ratio to ensure dyanmic sample rate
|
// Force an update to the sample rate ratio to ensure dyanmic sample rate
|
||||||
// changes are working correctly.
|
// changes are working correctly.
|
||||||
@@ -265,12 +282,12 @@ TEST_P(SincResamplerTest, Resample) {
|
|||||||
scoped_array<float> pure_destination(new float[output_samples]);
|
scoped_array<float> pure_destination(new float[output_samples]);
|
||||||
|
|
||||||
// Generate resampled signal.
|
// Generate resampled signal.
|
||||||
resampler.Resample(resampled_destination.get(), output_samples);
|
resampler.Resample(output_samples, resampled_destination.get());
|
||||||
|
|
||||||
// Generate pure signal.
|
// Generate pure signal.
|
||||||
SinusoidalLinearChirpSource pure_source(
|
SinusoidalLinearChirpSource pure_source(
|
||||||
output_rate_, output_samples, input_nyquist_freq, 0);
|
output_rate_, output_samples, input_nyquist_freq, 0);
|
||||||
pure_source.Run(pure_destination.get(), output_samples);
|
pure_source.Run(output_samples, pure_destination.get());
|
||||||
|
|
||||||
// Range of the Nyquist frequency (0.5 * min(input rate, output_rate)) which
|
// Range of the Nyquist frequency (0.5 * min(input rate, output_rate)) which
|
||||||
// we refer to as low and high.
|
// we refer to as low and high.
|
||||||
|
@@ -29,7 +29,7 @@ SinusoidalLinearChirpSource::SinusoidalLinearChirpSource(int sample_rate,
|
|||||||
k_ = (max_frequency_ - kMinFrequency) / duration;
|
k_ = (max_frequency_ - kMinFrequency) / duration;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SinusoidalLinearChirpSource::Run(float* destination, int frames) {
|
void SinusoidalLinearChirpSource::Run(int frames, float* destination) {
|
||||||
for (int i = 0; i < frames; ++i, ++current_index_) {
|
for (int i = 0; i < frames; ++i, ++current_index_) {
|
||||||
// Filter out frequencies higher than Nyquist.
|
// Filter out frequencies higher than Nyquist.
|
||||||
if (Frequency(current_index_) > 0.5 * sample_rate_) {
|
if (Frequency(current_index_) > 0.5 * sample_rate_) {
|
||||||
|
@@ -31,7 +31,7 @@ class SinusoidalLinearChirpSource : public SincResamplerCallback {
|
|||||||
|
|
||||||
virtual ~SinusoidalLinearChirpSource() {}
|
virtual ~SinusoidalLinearChirpSource() {}
|
||||||
|
|
||||||
virtual void Run(float* destination, int frames);
|
virtual void Run(int frames, float* destination);
|
||||||
|
|
||||||
double Frequency(int position);
|
double Frequency(int position);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user