NetEq changes.

BUG=
R=henrik.lundin@webrtc.org, minyue@webrtc.org, tina.legrand@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/9859005

git-svn-id: http://webrtc.googlecode.com/svn/trunk@5889 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
turaj@webrtc.org
2014-04-11 18:47:55 +00:00
parent ffd242432d
commit 8d1cdaa84e
19 changed files with 345 additions and 275 deletions

View File

@@ -302,55 +302,6 @@ TEST_F(AcmReceiverTest, DISABLED_ON_ANDROID(PostdecodingVad)) {
EXPECT_EQ(AudioFrame::kVadUnknown, frame.vad_activity_);
}
TEST_F(AcmReceiverTest, DISABLED_ON_ANDROID(FlushBuffer)) {
const int id = ACMCodecDB::kISAC;
EXPECT_EQ(0, receiver_->AddCodec(id, codecs_[id].pltype, codecs_[id].channels,
NULL));
const int kNumPackets = 5;
const int num_10ms_frames = codecs_[id].pacsize / (codecs_[id].plfreq / 100);
for (int n = 0; n < kNumPackets; ++n)
InsertOnePacketOfSilence(id);
ACMNetworkStatistics statistics;
receiver_->NetworkStatistics(&statistics);
ASSERT_EQ(num_10ms_frames * kNumPackets * 10, statistics.currentBufferSize);
receiver_->FlushBuffers();
receiver_->NetworkStatistics(&statistics);
ASSERT_EQ(0, statistics.currentBufferSize);
}
TEST_F(AcmReceiverTest, DISABLED_ON_ANDROID(PlayoutTimestamp)) {
const int id = ACMCodecDB::kPCM16Bwb;
EXPECT_EQ(0, receiver_->AddCodec(id, codecs_[id].pltype, codecs_[id].channels,
NULL));
receiver_->SetPlayoutMode(fax);
const int kNumPackets = 5;
const int num_10ms_frames = codecs_[id].pacsize / (codecs_[id].plfreq / 100);
uint32_t expected_timestamp;
AudioFrame frame;
int ts_offset = 0;
bool first_audio_frame = true;
for (int n = 0; n < kNumPackets; ++n) {
packet_sent_ = false;
InsertOnePacketOfSilence(id);
ASSERT_TRUE(packet_sent_);
expected_timestamp = last_packet_send_timestamp_;
for (int k = 0; k < num_10ms_frames; ++k) {
ASSERT_EQ(0, receiver_->GetAudio(codecs_[id].plfreq, &frame));
if (first_audio_frame) {
// There is an offset in playout timestamps. Perhaps, it is related to
// initial delay that NetEq applies
ts_offset = receiver_->PlayoutTimestamp() - expected_timestamp;
first_audio_frame = false;
} else {
EXPECT_EQ(expected_timestamp + ts_offset,
receiver_->PlayoutTimestamp());
}
expected_timestamp += codecs_[id].plfreq / 100; // Increment by 10 ms.
}
}
}
TEST_F(AcmReceiverTest, DISABLED_ON_ANDROID(LastAudioCodec)) {
const int kCodecId[] = {
ACMCodecDB::kISAC, ACMCodecDB::kPCMA, ACMCodecDB::kISACSWB,

View File

@@ -41,7 +41,7 @@ class AudioClassifier {
bool Analysis(const int16_t* input, int input_length, int channels);
// Gets the current classification : true = music, false = speech.
bool is_music() const { return is_music_; }
virtual bool is_music() const { return is_music_; }
// Gets the current music probability.
float music_probability() const { return music_probability_; }

View File

@@ -145,8 +145,8 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer,
reset_decoder);
}
void DecisionLogic::ExpandDecision(bool is_expand_decision) {
if (is_expand_decision) {
void DecisionLogic::ExpandDecision(Operations operation) {
if (operation == kExpand) {
num_consecutive_expands_++;
} else {
num_consecutive_expands_ = 0;

View File

@@ -92,7 +92,7 @@ class DecisionLogic {
// not. Note that this is necessary, since an expand decision can be changed
// to kNormal in NetEqImpl::GetDecision if there is still enough data in the
// sync buffer.
void ExpandDecision(bool is_expand_decision);
virtual void ExpandDecision(Operations operation);
// Adds |value| to |sample_memory_|.
void AddSampleMemory(int32_t value) {

View File

@@ -38,6 +38,10 @@ class DecisionLogicNormal : public DecisionLogic {
virtual ~DecisionLogicNormal() {}
protected:
static const int kAllowMergeWithoutExpandMs = 20; // 20 ms.
static const int kReinitAfterExpands = 100;
static const int kMaxWaitForPacket = 10;
// Returns the operation that should be done next. |sync_buffer| and |expand|
// are provided for reference. |decoder_frame_length| is the number of samples
// obtained from the last decoded frame. If there is a packet available, the
@@ -54,32 +58,29 @@ class DecisionLogicNormal : public DecisionLogic {
Modes prev_mode, bool play_dtmf,
bool* reset_decoder);
private:
static const int kAllowMergeWithoutExpandMs = 20; // 20 ms.
static const int kReinitAfterExpands = 100;
static const int kMaxWaitForPacket = 10;
// Returns the operation given that the next available packet is a comfort
// noise payload (RFC 3389 only, not codec-internal).
Operations CngOperation(Modes prev_mode, uint32_t target_timestamp,
uint32_t available_timestamp);
// Returns the operation given that no packets are available (except maybe
// a DTMF event, flagged by setting |play_dtmf| true).
Operations NoPacket(bool play_dtmf);
// Returns the operation to do given that the expected packet is available.
Operations ExpectedPacketAvailable(Modes prev_mode, bool play_dtmf);
// Returns the operation to do given that the expected packet is not
// available, but a packet further into the future is at hand.
Operations FuturePacketAvailable(const SyncBuffer& sync_buffer,
virtual Operations FuturePacketAvailable(
const SyncBuffer& sync_buffer,
const Expand& expand,
int decoder_frame_length, Modes prev_mode,
uint32_t target_timestamp,
uint32_t available_timestamp,
bool play_dtmf);
// Returns the operation to do given that the expected packet is available.
virtual Operations ExpectedPacketAvailable(Modes prev_mode, bool play_dtmf);
// Returns the operation given that no packets are available (except maybe
// a DTMF event, flagged by setting |play_dtmf| true).
virtual Operations NoPacket(bool play_dtmf);
private:
// Returns the operation given that the next available packet is a comfort
// noise payload (RFC 3389 only, not codec-internal).
Operations CngOperation(Modes prev_mode, uint32_t target_timestamp,
uint32_t available_timestamp);
// Checks if enough time has elapsed since the last successful timescale
// operation was done (i.e., accelerate or preemptive expand).
bool TimescaleAllowed() const { return timescale_hold_off_ == 0; }

View File

@@ -56,20 +56,9 @@ int Expand::Process(AudioMultiVector* output) {
// This is not the first expansion, parameters are already estimated.
// Extract a noise segment.
int16_t rand_length = max_lag_;
// TODO(hlundin): This if-statement should not be needed. Should be just
// as good to generate all of the vector in one call in either case.
if (rand_length <= RandomVector::kRandomTableSize) {
random_vector_->IncreaseSeedIncrement(2);
random_vector_->Generate(rand_length, random_vector);
} else {
// This only applies to SWB where length could be larger than 256.
assert(rand_length <= kMaxSampleRate / 8000 * 120 + 30);
random_vector_->IncreaseSeedIncrement(2);
random_vector_->Generate(RandomVector::kRandomTableSize, random_vector);
random_vector_->IncreaseSeedIncrement(2);
random_vector_->Generate(rand_length - RandomVector::kRandomTableSize,
&random_vector[RandomVector::kRandomTableSize]);
}
GenerateRandomVector(2, rand_length, random_vector);
}
@@ -262,82 +251,12 @@ int Expand::Process(AudioMultiVector* output) {
}
// Background noise part.
// TODO(hlundin): Move to separate method? In BackgroundNoise class?
if (background_noise_->initialized()) {
// Use background noise parameters.
memcpy(noise_vector - kNoiseLpcOrder,
background_noise_->FilterState(channel_ix),
sizeof(int16_t) * kNoiseLpcOrder);
if (background_noise_->ScaleShift(channel_ix) > 1) {
add_constant = 1 << (background_noise_->ScaleShift(channel_ix) - 1);
} else {
add_constant = 0;
}
// Scale random vector to correct energy level.
WebRtcSpl_AffineTransformVector(
scaled_random_vector, random_vector,
background_noise_->Scale(channel_ix), add_constant,
background_noise_->ScaleShift(channel_ix),
static_cast<int>(current_lag));
WebRtcSpl_FilterARFastQ12(scaled_random_vector, noise_vector,
background_noise_->Filter(channel_ix),
kNoiseLpcOrder + 1,
static_cast<int>(current_lag));
background_noise_->SetFilterState(
GenerateBackgroundNoise(random_vector,
channel_ix,
&(noise_vector[current_lag - kNoiseLpcOrder]),
kNoiseLpcOrder);
// Unmute the background noise.
int16_t bgn_mute_factor = background_noise_->MuteFactor(channel_ix);
NetEqBackgroundNoiseMode bgn_mode = background_noise_->mode();
if (bgn_mode == kBgnFade &&
consecutive_expands_ >= kMaxConsecutiveExpands &&
bgn_mute_factor > 0) {
// Fade BGN to zero.
// Calculate muting slope, approximately -2^18 / fs_hz.
int16_t mute_slope;
if (fs_hz_ == 8000) {
mute_slope = -32;
} else if (fs_hz_ == 16000) {
mute_slope = -16;
} else if (fs_hz_ == 32000) {
mute_slope = -8;
} else {
mute_slope = -5;
}
// Use UnmuteSignal function with negative slope.
// |bgn_mute_factor| is in Q14. |mute_slope| is in Q20.
DspHelper::UnmuteSignal(noise_vector, current_lag, &bgn_mute_factor,
mute_slope, noise_vector);
} else if (bgn_mute_factor < 16384) {
// If mode is kBgnOff, or if kBgnFade has started fading,
// Use regular |mute_slope|.
if (!stop_muting_ && bgn_mode != kBgnOff &&
!(bgn_mode == kBgnFade &&
consecutive_expands_ >= kMaxConsecutiveExpands)) {
DspHelper::UnmuteSignal(noise_vector, static_cast<int>(current_lag),
&bgn_mute_factor, parameters.mute_slope,
noise_vector);
} else {
// kBgnOn and stop muting, or
// kBgnOff (mute factor is always 0), or
// kBgnFade has reached 0.
WebRtcSpl_AffineTransformVector(noise_vector, noise_vector,
bgn_mute_factor, 8192, 14,
static_cast<int>(current_lag));
}
}
// Update mute_factor in BackgroundNoise class.
background_noise_->SetMuteFactor(channel_ix, bgn_mute_factor);
} else {
// BGN parameters have not been initialized; use zero noise.
memset(noise_vector, 0, sizeof(int16_t) * current_lag);
}
channel_parameters_[channel_ix].mute_slope,
TooManyExpands(),
current_lag,
unvoiced_array_memory);
// Add background noise to the combined voiced-unvoiced signal.
for (size_t i = 0; i < current_lag; i++) {
@@ -353,11 +272,8 @@ int Expand::Process(AudioMultiVector* output) {
}
// Increase call number and cap it.
++consecutive_expands_;
if (consecutive_expands_ > kMaxConsecutiveExpands) {
consecutive_expands_ = kMaxConsecutiveExpands;
}
consecutive_expands_ = consecutive_expands_ >= kMaxConsecutiveExpands ?
kMaxConsecutiveExpands : consecutive_expands_ + 1;
return 0;
}
@@ -373,6 +289,24 @@ void Expand::SetParametersForMergeAfterExpand() {
stop_muting_ = true;
}
void Expand::InitializeForAnExpandPeriod() {
lag_index_direction_ = 1;
current_lag_index_ = -1;
stop_muting_ = false;
random_vector_->set_seed_increment(1);
consecutive_expands_ = 0;
for (size_t ix = 0; ix < num_channels_; ++ix) {
channel_parameters_[ix].current_voice_mix_factor = 16384; // 1.0 in Q14.
channel_parameters_[ix].mute_factor = 16384; // 1.0 in Q14.
// Start with 0 gain for background noise.
background_noise_->SetMuteFactor(ix, 0);
}
}
bool Expand::TooManyExpands() {
return consecutive_expands_ >= kMaxConsecutiveExpands;
}
void Expand::AnalyzeSignal(int16_t* random_vector) {
int32_t auto_correlation[kUnvoicedLpcOrder + 1];
int16_t reflection_coeff[kUnvoicedLpcOrder];
@@ -400,18 +334,8 @@ void Expand::AnalyzeSignal(int16_t* random_vector) {
const int16_t* audio_history =
&(*sync_buffer_)[0][sync_buffer_->Size() - signal_length];
// Initialize some member variables.
lag_index_direction_ = 1;
current_lag_index_ = -1;
stop_muting_ = false;
random_vector_->set_seed_increment(1);
consecutive_expands_ = 0;
for (size_t ix = 0; ix < num_channels_; ++ix) {
channel_parameters_[ix].current_voice_mix_factor = 16384; // 1.0 in Q14.
channel_parameters_[ix].mute_factor = 16384; // 1.0 in Q14.
// Start with 0 gain for background noise.
background_noise_->SetMuteFactor(ix, 0);
}
// Initialize.
InitializeForAnExpandPeriod();
// Calculate correlation in downsampled domain (4 kHz sample rate).
int16_t correlation_scale;
@@ -873,5 +797,108 @@ Expand* ExpandFactory::Create(BackgroundNoise* background_noise,
num_channels);
}
// TODO(turajs): This can be moved to BackgroundNoise class.
void Expand::GenerateBackgroundNoise(int16_t* random_vector,
size_t channel,
int16_t mute_slope,
bool too_many_expands,
size_t num_noise_samples,
int16_t* buffer) {
static const int kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder;
int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125];
assert(kMaxSampleRate / 8000 * 125 >= (int)num_noise_samples);
int16_t* noise_samples = &buffer[kNoiseLpcOrder];
if (background_noise_->initialized()) {
// Use background noise parameters.
memcpy(noise_samples - kNoiseLpcOrder,
background_noise_->FilterState(channel),
sizeof(int16_t) * kNoiseLpcOrder);
int dc_offset = 0;
if (background_noise_->ScaleShift(channel) > 1) {
dc_offset = 1 << (background_noise_->ScaleShift(channel) - 1);
}
// Scale random vector to correct energy level.
WebRtcSpl_AffineTransformVector(
scaled_random_vector, random_vector,
background_noise_->Scale(channel), dc_offset,
background_noise_->ScaleShift(channel),
static_cast<int>(num_noise_samples));
WebRtcSpl_FilterARFastQ12(scaled_random_vector, noise_samples,
background_noise_->Filter(channel),
kNoiseLpcOrder + 1,
static_cast<int>(num_noise_samples));
background_noise_->SetFilterState(
channel,
&(noise_samples[num_noise_samples - kNoiseLpcOrder]),
kNoiseLpcOrder);
// Unmute the background noise.
int16_t bgn_mute_factor = background_noise_->MuteFactor(channel);
NetEqBackgroundNoiseMode bgn_mode = background_noise_->mode();
if (bgn_mode == kBgnFade && too_many_expands && bgn_mute_factor > 0) {
// Fade BGN to zero.
// Calculate muting slope, approximately -2^18 / fs_hz.
int16_t mute_slope;
if (fs_hz_ == 8000) {
mute_slope = -32;
} else if (fs_hz_ == 16000) {
mute_slope = -16;
} else if (fs_hz_ == 32000) {
mute_slope = -8;
} else {
mute_slope = -5;
}
// Use UnmuteSignal function with negative slope.
// |bgn_mute_factor| is in Q14. |mute_slope| is in Q20.
DspHelper::UnmuteSignal(noise_samples,
num_noise_samples,
&bgn_mute_factor,
mute_slope,
noise_samples);
} else if (bgn_mute_factor < 16384) {
// If mode is kBgnOff, or if kBgnFade has started fading,
// Use regular |mute_slope|.
if (!stop_muting_ && bgn_mode != kBgnOff &&
!(bgn_mode == kBgnFade && too_many_expands)) {
DspHelper::UnmuteSignal(noise_samples,
static_cast<int>(num_noise_samples),
&bgn_mute_factor,
mute_slope,
noise_samples);
} else {
// kBgnOn and stop muting, or
// kBgnOff (mute factor is always 0), or
// kBgnFade has reached 0.
WebRtcSpl_AffineTransformVector(noise_samples, noise_samples,
bgn_mute_factor, 8192, 14,
static_cast<int>(num_noise_samples));
}
}
// Update mute_factor in BackgroundNoise class.
background_noise_->SetMuteFactor(channel, bgn_mute_factor);
} else {
// BGN parameters have not been initialized; use zero noise.
memset(noise_samples, 0, sizeof(int16_t) * num_noise_samples);
}
}
void Expand::GenerateRandomVector(int seed_increment,
size_t length,
int16_t* random_vector) {
// TODO(turajs): According to hlundin The loop should not be needed. Should be
// just as good to generate all of the vector in one call.
size_t samples_generated = 0;
const size_t kMaxRandSamples = RandomVector::kRandomTableSize;
while(samples_generated < length) {
size_t rand_length = std::min(length - samples_generated, kMaxRandSamples);
random_vector_->IncreaseSeedIncrement(seed_increment);
random_vector_->Generate(rand_length, &random_vector[samples_generated]);
samples_generated += rand_length;
}
}
} // namespace webrtc

View File

@@ -36,12 +36,13 @@ class Expand {
RandomVector* random_vector,
int fs,
size_t num_channels)
: background_noise_(background_noise),
: random_vector_(random_vector),
sync_buffer_(sync_buffer),
random_vector_(random_vector),
first_expand_(true),
fs_hz_(fs),
num_channels_(num_channels),
consecutive_expands_(0),
background_noise_(background_noise),
overlap_length_(5 * fs / 8000),
lag_index_direction_(0),
current_lag_index_(0),
@@ -57,19 +58,19 @@ class Expand {
virtual ~Expand() {}
// Resets the object.
void Reset();
virtual void Reset();
// The main method to produce concealment data. The data is appended to the
// end of |output|.
int Process(AudioMultiVector* output);
virtual int Process(AudioMultiVector* output);
// Prepare the object to do extra expansion during normal operation following
// a period of expands.
void SetParametersForNormalAfterExpand();
virtual void SetParametersForNormalAfterExpand();
// Prepare the object to do extra expansion during merge operation following
// a period of expands.
void SetParametersForMergeAfterExpand();
virtual void SetParametersForMergeAfterExpand();
// Sets the mute factor for |channel| to |value|.
void SetMuteFactor(int16_t value, size_t channel) {
@@ -84,9 +85,38 @@ class Expand {
}
// Accessors and mutators.
size_t overlap_length() const { return overlap_length_; }
virtual size_t overlap_length() const { return overlap_length_; }
int16_t max_lag() const { return max_lag_; }
protected:
static const int kMaxConsecutiveExpands = 200;
void GenerateRandomVector(int seed_increment,
size_t length,
int16_t* random_vector);
void GenerateBackgroundNoise(int16_t* random_vector,
size_t channel,
int16_t mute_slope,
bool too_many_expands,
size_t num_noise_samples,
int16_t* buffer);
// Initializes member variables at the beginning of an expand period.
void InitializeForAnExpandPeriod();
bool TooManyExpands();
// Analyzes the signal history in |sync_buffer_|, and set up all parameters
// necessary to produce concealment data.
void AnalyzeSignal(int16_t* random_vector);
RandomVector* random_vector_;
SyncBuffer* sync_buffer_;
bool first_expand_;
const int fs_hz_;
const size_t num_channels_;
int consecutive_expands_;
private:
static const int kUnvoicedLpcOrder = 6;
static const int kNumCorrelationCandidates = 3;
@@ -94,7 +124,6 @@ class Expand {
static const int kLpcAnalysisLength = 160;
static const int kMaxSampleRate = 48000;
static const int kNumLags = 3;
static const int kMaxConsecutiveExpands = 200;
struct ChannelParameters {
// Constructor.
@@ -122,10 +151,6 @@ class Expand {
int16_t mute_slope; /* Q20 */
};
// Analyze the signal history in |sync_buffer_|, and set up all parameters
// necessary to produce concealment data.
void AnalyzeSignal(int16_t* random_vector);
// Calculate the auto-correlation of |input|, with length |input_length|
// samples. The correlation is calculated from a downsampled version of
// |input|, and is written to |output|. The scale factor is written to
@@ -136,13 +161,7 @@ class Expand {
void UpdateLagIndex();
BackgroundNoise* background_noise_;
SyncBuffer* sync_buffer_;
RandomVector* random_vector_;
bool first_expand_;
const int fs_hz_;
const size_t num_channels_;
const size_t overlap_length_;
int consecutive_expands_;
int16_t max_lag_;
size_t expand_lags_[kNumLags];
int lag_index_direction_;

View File

@@ -108,7 +108,8 @@ class NetEq {
// Creates a new NetEq object, starting at the sample rate |sample_rate_hz|.
// (Note that it will still change the sample rate depending on what payloads
// are being inserted; |sample_rate_hz| is just for startup configuration.)
static NetEq* Create(int sample_rate_hz);
static NetEq* Create(int sample_rate_hz,
bool enable_audio_classifier = false);
virtual ~NetEq() {}

View File

@@ -20,6 +20,7 @@
#include "webrtc/modules/audio_coding/neteq4/dsp_helper.h"
#include "webrtc/modules/audio_coding/neteq4/expand.h"
#include "webrtc/modules/audio_coding/neteq4/sync_buffer.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
namespace webrtc {
@@ -307,9 +308,11 @@ int16_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max,
stop_position_downsamp, correlation_shift, 1);
// Normalize correlation to 14 bits and copy to a 16-bit array.
static const int kPadLength = 4;
int16_t correlation16[kPadLength + kMaxCorrelationLength + kPadLength] = {0};
int16_t* correlation_ptr = &correlation16[kPadLength];
const int pad_length = static_cast<int>(expand_->overlap_length() - 1);
const int correlation_buffer_size = 2 * pad_length + kMaxCorrelationLength;
scoped_ptr<int16_t[]> correlation16(new int16_t[correlation_buffer_size]);
memset(correlation16.get(), 0, correlation_buffer_size * sizeof(int16_t));
int16_t* correlation_ptr = &correlation16[pad_length];
int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation,
stop_position_downsamp);
int16_t norm_shift = std::max(0, 17 - WebRtcSpl_NormW32(max_correlation));
@@ -332,7 +335,7 @@ int16_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max,
// start index |start_index_downsamp| and the effective array length.
int modified_stop_pos =
std::min(stop_position_downsamp,
kMaxCorrelationLength + kPadLength - start_index_downsamp);
kMaxCorrelationLength + pad_length - start_index_downsamp);
int best_correlation_index;
int16_t best_correlation;
static const int kNumCorrelationCandidates = 1;
@@ -355,4 +358,9 @@ int16_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max,
return best_correlation_index;
}
int Merge::RequiredFutureSamples() {
return static_cast<int>(fs_hz_ / 100 * num_channels_); // 10 ms.
}
} // namespace webrtc

View File

@@ -35,8 +35,8 @@ class Merge {
public:
Merge(int fs_hz, size_t num_channels, Expand* expand, SyncBuffer* sync_buffer)
: fs_hz_(fs_hz),
fs_mult_(fs_hz_ / 8000),
num_channels_(num_channels),
fs_mult_(fs_hz_ / 8000),
timestamps_per_call_(fs_hz_ / 100),
expand_(expand),
sync_buffer_(sync_buffer),
@@ -44,6 +44,8 @@ class Merge {
assert(num_channels_ > 0);
}
virtual ~Merge() {}
// The main method to produce the audio data. The decoded data is supplied in
// |input|, having |input_length| samples in total for all channels
// (interleaved). The result is written to |output|. The number of channels
@@ -51,10 +53,16 @@ class Merge {
// de-interleaving |input|. The values in |external_mute_factor_array| (Q14)
// will be used to scale the audio, and is updated in the process. The array
// must have |num_channels_| elements.
int Process(int16_t* input, size_t input_length,
virtual int Process(int16_t* input, size_t input_length,
int16_t* external_mute_factor_array,
AudioMultiVector* output);
virtual int RequiredFutureSamples();
protected:
const int fs_hz_;
const size_t num_channels_;
private:
static const int kMaxSampleRate = 48000;
static const int kExpandDownsampLength = 100;
@@ -87,9 +95,7 @@ class Merge {
int start_position, int input_length,
int expand_period) const;
const int fs_hz_;
const int fs_mult_; // fs_hz_ / 8000.
const size_t num_channels_;
const int timestamps_per_call_;
Expand* expand_;
SyncBuffer* sync_buffer_;

View File

@@ -28,7 +28,7 @@ namespace webrtc {
// Creates all classes needed and inject them into a new NetEqImpl object.
// Return the new object.
NetEq* NetEq::Create(int sample_rate_hz) {
NetEq* NetEq::Create(int sample_rate_hz, bool enable_audio_classifier) {
BufferLevelFilter* buffer_level_filter = new BufferLevelFilter;
DecoderDatabase* decoder_database = new DecoderDatabase;
DelayPeakDetector* delay_peak_detector = new DelayPeakDetector;

View File

@@ -61,7 +61,8 @@ NetEqImpl::NetEqImpl(int fs,
TimestampScaler* timestamp_scaler,
AccelerateFactory* accelerate_factory,
ExpandFactory* expand_factory,
PreemptiveExpandFactory* preemptive_expand_factory)
PreemptiveExpandFactory* preemptive_expand_factory,
bool create_components)
: buffer_level_filter_(buffer_level_filter),
decoder_database_(decoder_database),
delay_manager_(delay_manager),
@@ -103,13 +104,9 @@ NetEqImpl::NetEqImpl(int fs,
output_size_samples_ = kOutputSizeMs * 8 * fs_mult_;
decoder_frame_length_ = 3 * output_size_samples_;
WebRtcSpl_Init();
decision_logic_.reset(DecisionLogic::Create(fs_hz_, output_size_samples_,
kPlayoutOn,
decoder_database_.get(),
*packet_buffer_.get(),
delay_manager_.get(),
buffer_level_filter_.get()));
if (create_components) {
SetSampleRateAndChannels(fs, 1); // Default is 1 channel.
}
}
NetEqImpl::~NetEqImpl() {
@@ -284,12 +281,7 @@ void NetEqImpl::SetPlayoutMode(NetEqPlayoutMode mode) {
CriticalSectionScoped lock(crit_sect_.get());
if (!decision_logic_.get() || mode != decision_logic_->playout_mode()) {
// The reset() method calls delete for the old object.
decision_logic_.reset(DecisionLogic::Create(fs_hz_, output_size_samples_,
mode,
decoder_database_.get(),
*packet_buffer_.get(),
delay_manager_.get(),
buffer_level_filter_.get()));
CreateDecisionLogic(mode);
}
}
@@ -948,7 +940,7 @@ int NetEqImpl::GetDecision(Operations* operation,
return 0;
}
decision_logic_->ExpandDecision(*operation == kExpand);
decision_logic_->ExpandDecision(*operation);
// Check conditions for reset.
if (new_codec_ || *operation == kUndefined) {
@@ -1067,6 +1059,11 @@ int NetEqImpl::GetDecision(Operations* operation,
// Move on with the preemptive expand decision.
break;
}
case kMerge: {
required_samples =
std::max(merge_->RequiredFutureSamples(), required_samples);
break;
}
default: {
// Do nothing.
}
@@ -1834,6 +1831,14 @@ int NetEqImpl::ExtractPackets(int required_samples, PacketList* packet_list) {
return extracted_samples;
}
void NetEqImpl::UpdatePlcComponents(int fs_hz, size_t channels) {
// Delete objects and create new ones.
expand_.reset(expand_factory_->Create(background_noise_.get(),
sync_buffer_.get(), &random_vector_,
fs_hz, channels));
merge_.reset(new Merge(fs_hz, channels, expand_.get(), sync_buffer_.get()));
}
void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
LOG_API2(fs_hz, channels);
// TODO(hlundin): Change to an enumerator and skip assert.
@@ -1881,21 +1886,20 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
// Reset random vector.
random_vector_.Reset();
// Delete Expand object and create a new one.
expand_.reset(expand_factory_->Create(background_noise_.get(),
sync_buffer_.get(), &random_vector_,
fs_hz, channels));
UpdatePlcComponents(fs_hz, channels);
// Move index so that we create a small set of future samples (all 0).
sync_buffer_->set_next_index(sync_buffer_->next_index() -
expand_->overlap_length());
normal_.reset(new Normal(fs_hz, decoder_database_.get(), *background_noise_,
expand_.get()));
merge_.reset(new Merge(fs_hz, channels, expand_.get(), sync_buffer_.get()));
accelerate_.reset(
accelerate_factory_->Create(fs_hz, channels, *background_noise_));
preemptive_expand_.reset(
preemptive_expand_factory_->Create(fs_hz, channels, *background_noise_));
preemptive_expand_.reset(preemptive_expand_factory_->Create(
fs_hz, channels,
*background_noise_,
static_cast<int>(expand_->overlap_length())));
// Delete ComfortNoise object and create a new one.
comfort_noise_.reset(new ComfortNoise(fs_hz, decoder_database_.get(),
@@ -1908,8 +1912,11 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
decoded_buffer_.reset(new int16_t[decoded_buffer_length_]);
}
// Communicate new sample rate and output size to DecisionLogic object.
assert(decision_logic_.get());
// Create DecisionLogic if it is not created yet, then communicate new sample
// rate and output size to DecisionLogic object.
if (!decision_logic_.get()) {
CreateDecisionLogic(kPlayoutOn);
}
decision_logic_->SetSampleRate(fs_hz_, output_size_samples_);
}
@@ -1930,4 +1937,12 @@ NetEqOutputType NetEqImpl::LastOutputType() {
}
}
void NetEqImpl::CreateDecisionLogic(NetEqPlayoutMode mode) {
decision_logic_.reset(DecisionLogic::Create(fs_hz_, output_size_samples_,
mode,
decoder_database_.get(),
*packet_buffer_.get(),
delay_manager_.get(),
buffer_level_filter_.get()));
}
} // namespace webrtc

View File

@@ -70,7 +70,8 @@ class NetEqImpl : public webrtc::NetEq {
TimestampScaler* timestamp_scaler,
AccelerateFactory* accelerate_factory,
ExpandFactory* expand_factory,
PreemptiveExpandFactory* preemptive_expand_factory);
PreemptiveExpandFactory* preemptive_expand_factory,
bool create_components = true);
virtual ~NetEqImpl();
@@ -203,7 +204,7 @@ class NetEqImpl : public webrtc::NetEq {
// This accessor method is only intended for testing purposes.
virtual const SyncBuffer* sync_buffer_for_test() const;
private:
protected:
static const int kOutputSizeMs = 10;
static const int kMaxFrameSize = 2880; // 60 ms @ 48 kHz.
// TODO(hlundin): Provide a better value for kSyncBufferSize.
@@ -331,6 +332,14 @@ class NetEqImpl : public webrtc::NetEq {
// GetAudio().
NetEqOutputType LastOutputType() EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Updates Expand and Merge.
virtual void UpdatePlcComponents(int fs_hz, size_t channels)
EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
// Creates DecisionLogic object for the given mode.
void CreateDecisionLogic(NetEqPlayoutMode mode)
EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
const scoped_ptr<BufferLevelFilter> buffer_level_filter_;
const scoped_ptr<DecoderDatabase> decoder_database_;
const scoped_ptr<DelayManager> delay_manager_;
@@ -388,6 +397,7 @@ class NetEqImpl : public webrtc::NetEq {
int decoded_packet_sequence_number_ GUARDED_BY(crit_sect_);
uint32_t decoded_packet_timestamp_ GUARDED_BY(crit_sect_);
private:
DISALLOW_COPY_AND_ASSIGN(NetEqImpl);
};

View File

@@ -49,6 +49,7 @@
'CODEC_PCM16B_WB',
'CODEC_ISAC_SWB',
'CODEC_PCM16B_32KHZ',
'CODEC_PCM16B_48KHZ',
'CODEC_CNGCODEC8',
'CODEC_CNGCODEC16',
'CODEC_CNGCODEC32',

View File

@@ -18,6 +18,7 @@
#include <stdlib.h>
#include <string.h> // memset
#include <algorithm>
#include <set>
#include <string>
#include <vector>
@@ -232,6 +233,7 @@ class NetEqDecodingTest : public ::testing::Test {
unsigned int sim_clock_;
int16_t out_data_[kMaxBlockSize];
int output_sample_rate_;
int algorithmic_delay_ms_;
};
// Allocating the static const so that it can be passed by reference.
@@ -246,12 +248,16 @@ NetEqDecodingTest::NetEqDecodingTest()
: neteq_(NULL),
rtp_fp_(NULL),
sim_clock_(0),
output_sample_rate_(kInitSampleRateHz) {
output_sample_rate_(kInitSampleRateHz),
algorithmic_delay_ms_(0) {
memset(out_data_, 0, sizeof(out_data_));
}
void NetEqDecodingTest::SetUp() {
neteq_ = NetEq::Create(kInitSampleRateHz);
NetEqNetworkStatistics stat;
ASSERT_EQ(0, neteq_->NetworkStatistics(&stat));
algorithmic_delay_ms_ = stat.current_buffer_size_ms;
ASSERT_TRUE(neteq_);
LoadDecoders();
}
@@ -483,8 +489,8 @@ void NetEqDecodingTest::CheckBgnOff(int sampling_rate_hz,
ASSERT_EQ(expected_samples_per_channel, samples_per_channel);
// To be able to test the fading of background noise we need at lease to pull
// 610 frames.
const int kFadingThreshold = 610;
// 611 frames.
const int kFadingThreshold = 611;
// Test several CNG-to-PLC packet for the expected behavior. The number 20 is
// arbitrary, but sufficiently large to test enough number of frames.
@@ -1110,12 +1116,16 @@ TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(SyncPacketInsert)) {
// First insert several noise like packets, then sync-packets. Decoding all
// packets should not produce error, statistics should not show any packet loss
// and sync-packets should decode to zero.
// TODO(turajs) we will have a better test if we have a referece NetEq, and
// when Sync packets are inserted in "test" NetEq we insert all-zero payload
// in reference NetEq and compare the output of those two.
TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(SyncPacketDecode)) {
WebRtcRTPHeader rtp_info;
PopulateRtpInfo(0, 0, &rtp_info);
const int kPayloadBytes = kBlockSize16kHz * sizeof(int16_t);
uint8_t payload[kPayloadBytes];
int16_t decoded[kBlockSize16kHz];
int algorithmic_frame_delay = algorithmic_delay_ms_ / 10 + 1;
for (int n = 0; n < kPayloadBytes; ++n) {
payload[n] = (rand() & 0xF0) + 1; // Non-zero random sequence.
}
@@ -1125,7 +1135,6 @@ TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(SyncPacketDecode)) {
int num_channels;
int samples_per_channel;
uint32_t receive_timestamp = 0;
int delay_samples = 0;
for (int n = 0; n < 100; ++n) {
ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes,
receive_timestamp));
@@ -1135,16 +1144,15 @@ TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(SyncPacketDecode)) {
ASSERT_EQ(kBlockSize16kHz, samples_per_channel);
ASSERT_EQ(1, num_channels);
// Even if there is RTP packet in NetEq's buffer, the first frame pulled
// from NetEq starts with few zero samples. Here we measure this delay.
if (n == 0) {
while (decoded[delay_samples] == 0) delay_samples++;
}
rtp_info.header.sequenceNumber++;
rtp_info.header.timestamp += kBlockSize16kHz;
receive_timestamp += kBlockSize16kHz;
}
const int kNumSyncPackets = 10;
// Make sure sufficient number of sync packets are inserted that we can
// conduct a test.
ASSERT_GT(kNumSyncPackets, algorithmic_frame_delay);
// Insert sync-packets, the decoded sequence should be all-zero.
for (int n = 0; n < kNumSyncPackets; ++n) {
ASSERT_EQ(0, neteq_->InsertSyncPacket(rtp_info, receive_timestamp));
@@ -1153,30 +1161,37 @@ TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(SyncPacketDecode)) {
&output_type));
ASSERT_EQ(kBlockSize16kHz, samples_per_channel);
ASSERT_EQ(1, num_channels);
EXPECT_TRUE(IsAllZero(&decoded[delay_samples],
samples_per_channel * num_channels - delay_samples));
delay_samples = 0; // Delay only matters in the first frame.
if (n > algorithmic_frame_delay) {
EXPECT_TRUE(IsAllZero(decoded, samples_per_channel * num_channels));
}
rtp_info.header.sequenceNumber++;
rtp_info.header.timestamp += kBlockSize16kHz;
receive_timestamp += kBlockSize16kHz;
}
// We insert a regular packet, if sync packet are not correctly buffered then
// We insert regular packets, if sync packet are not correctly buffered then
// network statistics would show some packet loss.
for (int n = 0; n <= algorithmic_frame_delay + 10; ++n) {
ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes,
receive_timestamp));
ASSERT_EQ(0, neteq_->GetAudio(kBlockSize16kHz, decoded,
&samples_per_channel, &num_channels,
&output_type));
// Make sure the last inserted packet is decoded and there are non-zero
// samples.
EXPECT_FALSE(IsAllZero(decoded, samples_per_channel * num_channels));
if (n >= algorithmic_frame_delay + 1) {
// Expect that this frame contain samples from regular RTP.
EXPECT_TRUE(IsAllNonZero(decoded, samples_per_channel * num_channels));
}
rtp_info.header.sequenceNumber++;
rtp_info.header.timestamp += kBlockSize16kHz;
receive_timestamp += kBlockSize16kHz;
}
NetEqNetworkStatistics network_stats;
ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
// Expecting a "clean" network.
EXPECT_EQ(0, network_stats.packet_loss_rate);
EXPECT_EQ(0, network_stats.expand_rate);
EXPECT_EQ(0, network_stats.accelerate_rate);
EXPECT_EQ(0, network_stats.preemptive_rate);
EXPECT_LE(network_stats.preemptive_rate, 150);
}
// Test if the size of the packet buffer reported correctly when containing
@@ -1199,7 +1214,8 @@ TEST_F(NetEqDecodingTest,
int num_channels;
int samples_per_channel;
uint32_t receive_timestamp = 0;
for (int n = 0; n < 1; ++n) {
int algorithmic_frame_delay = algorithmic_delay_ms_ / 10 + 1;
for (int n = 0; n < algorithmic_frame_delay; ++n) {
ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes,
receive_timestamp));
ASSERT_EQ(0, neteq_->GetAudio(kBlockSize16kHz, decoded,
@@ -1225,7 +1241,8 @@ TEST_F(NetEqDecodingTest,
}
NetEqNetworkStatistics network_stats;
ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
EXPECT_EQ(kNumSyncPackets * 10, network_stats.current_buffer_size_ms);
EXPECT_EQ(kNumSyncPackets * 10 + algorithmic_delay_ms_,
network_stats.current_buffer_size_ms);
// Rewind |rtp_info| to that of the first sync packet.
memcpy(&rtp_info, &first_sync_packet_rtp_info, sizeof(rtp_info));
@@ -1298,7 +1315,8 @@ void NetEqDecodingTest::WrapTest(uint16_t start_seq_no,
if (packets_inserted > 4) {
// Expect preferred and actual buffer size to be no more than 2 frames.
EXPECT_LE(network_stats.preferred_buffer_size_ms, kFrameSizeMs * 2);
EXPECT_LE(network_stats.current_buffer_size_ms, kFrameSizeMs * 2);
EXPECT_LE(network_stats.current_buffer_size_ms, kFrameSizeMs * 2 +
algorithmic_delay_ms_);
}
last_seq_no = seq_no;
last_timestamp = timestamp;
@@ -1362,6 +1380,8 @@ void NetEqDecodingTest::DuplicateCng() {
const int kSamples = kFrameSizeMs * kSampleRateKhz;
const int kPayloadBytes = kSamples * 2;
const int algorithmic_delay_samples = std::max(
algorithmic_delay_ms_ * kSampleRateKhz, 5 * kSampleRateKhz / 8);
// Insert three speech packet. Three are needed to get the frame length
// correct.
int out_len;
@@ -1398,7 +1418,7 @@ void NetEqDecodingTest::DuplicateCng() {
kMaxBlockSize, out_data_, &out_len, &num_channels, &type));
ASSERT_EQ(kBlockSize16kHz, out_len);
EXPECT_EQ(kOutputCNG, type);
EXPECT_EQ(timestamp - 10, neteq_->PlayoutTimestamp());
EXPECT_EQ(timestamp - algorithmic_delay_samples, neteq_->PlayoutTimestamp());
// Insert the same CNG packet again. Note that at this point it is old, since
// we have already decoded the first copy of it.
@@ -1412,7 +1432,8 @@ void NetEqDecodingTest::DuplicateCng() {
kMaxBlockSize, out_data_, &out_len, &num_channels, &type));
ASSERT_EQ(kBlockSize16kHz, out_len);
EXPECT_EQ(kOutputCNG, type);
EXPECT_EQ(timestamp - 10, neteq_->PlayoutTimestamp());
EXPECT_EQ(timestamp - algorithmic_delay_samples,
neteq_->PlayoutTimestamp());
}
// Insert speech again.
@@ -1427,7 +1448,8 @@ void NetEqDecodingTest::DuplicateCng() {
kMaxBlockSize, out_data_, &out_len, &num_channels, &type));
ASSERT_EQ(kBlockSize16kHz, out_len);
EXPECT_EQ(kOutputNormal, type);
EXPECT_EQ(timestamp + kSamples - 10, neteq_->PlayoutTimestamp());
EXPECT_EQ(timestamp + kSamples - algorithmic_delay_samples,
neteq_->PlayoutTimestamp());
}
TEST_F(NetEqDecodingTest, DiscardDuplicateCng) { DuplicateCng(); }

View File

@@ -101,8 +101,10 @@ PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch(
PreemptiveExpand* PreemptiveExpandFactory::Create(
int sample_rate_hz,
size_t num_channels,
const BackgroundNoise& background_noise) const {
return new PreemptiveExpand(sample_rate_hz, num_channels, background_noise);
const BackgroundNoise& background_noise,
int overlap_samples) const {
return new PreemptiveExpand(
sample_rate_hz, num_channels, background_noise, overlap_samples);
}
} // namespace webrtc

View File

@@ -29,11 +29,13 @@ class BackgroundNoise;
// PreemptiveExpand are implemented.
class PreemptiveExpand : public TimeStretch {
public:
PreemptiveExpand(int sample_rate_hz, size_t num_channels,
const BackgroundNoise& background_noise)
PreemptiveExpand(int sample_rate_hz,
size_t num_channels,
const BackgroundNoise& background_noise,
int overlap_samples)
: TimeStretch(sample_rate_hz, num_channels, background_noise),
old_data_length_per_channel_(-1),
overlap_samples_(5 * sample_rate_hz / 8000) {
overlap_samples_(overlap_samples) {
}
virtual ~PreemptiveExpand() {}
@@ -77,7 +79,8 @@ struct PreemptiveExpandFactory {
virtual PreemptiveExpand* Create(
int sample_rate_hz,
size_t num_channels,
const BackgroundNoise& background_noise) const;
const BackgroundNoise& background_noise,
int overlap_samples) const;
};
} // namespace webrtc

View File

@@ -78,7 +78,8 @@ class SyncBuffer : public AudioMultiVector {
// created.
void Flush();
const AudioVector& Channel(size_t n) { return *channels_[n]; }
const AudioVector& Channel(size_t n) const { return *channels_[n]; }
AudioVector& Channel(size_t n) { return *channels_[n]; }
// Accessors and mutators.
size_t next_index() const { return next_index_; }

View File

@@ -21,14 +21,17 @@ namespace webrtc {
TEST(TimeStretch, CreateAndDestroy) {
const int kSampleRate = 8000;
const size_t kNumChannels = 1;
const int kOverlapSamples = 5 * kSampleRate / 8000;
BackgroundNoise bgn(kNumChannels);
Accelerate accelerate(kSampleRate, kNumChannels, bgn);
PreemptiveExpand preemptive_expand(kSampleRate, kNumChannels, bgn);
PreemptiveExpand preemptive_expand(
kSampleRate, kNumChannels, bgn, kOverlapSamples);
}
TEST(TimeStretch, CreateUsingFactory) {
const int kSampleRate = 8000;
const size_t kNumChannels = 1;
const int kOverlapSamples = 5 * kSampleRate / 8000;
BackgroundNoise bgn(kNumChannels);
AccelerateFactory accelerate_factory;
@@ -38,8 +41,8 @@ TEST(TimeStretch, CreateUsingFactory) {
delete accelerate;
PreemptiveExpandFactory preemptive_expand_factory;
PreemptiveExpand* preemptive_expand =
preemptive_expand_factory.Create(kSampleRate, kNumChannels, bgn);
PreemptiveExpand* preemptive_expand = preemptive_expand_factory.Create(
kSampleRate, kNumChannels, bgn, kOverlapSamples);
EXPECT_TRUE(preemptive_expand != NULL);
delete preemptive_expand;
}