diff --git a/webrtc/modules/audio_coding/main/acm2/acm_receiver_unittest.cc b/webrtc/modules/audio_coding/main/acm2/acm_receiver_unittest.cc index 712eeb268..d39a1dc7e 100644 --- a/webrtc/modules/audio_coding/main/acm2/acm_receiver_unittest.cc +++ b/webrtc/modules/audio_coding/main/acm2/acm_receiver_unittest.cc @@ -302,55 +302,6 @@ TEST_F(AcmReceiverTest, DISABLED_ON_ANDROID(PostdecodingVad)) { EXPECT_EQ(AudioFrame::kVadUnknown, frame.vad_activity_); } -TEST_F(AcmReceiverTest, DISABLED_ON_ANDROID(FlushBuffer)) { - const int id = ACMCodecDB::kISAC; - EXPECT_EQ(0, receiver_->AddCodec(id, codecs_[id].pltype, codecs_[id].channels, - NULL)); - const int kNumPackets = 5; - const int num_10ms_frames = codecs_[id].pacsize / (codecs_[id].plfreq / 100); - for (int n = 0; n < kNumPackets; ++n) - InsertOnePacketOfSilence(id); - ACMNetworkStatistics statistics; - receiver_->NetworkStatistics(&statistics); - ASSERT_EQ(num_10ms_frames * kNumPackets * 10, statistics.currentBufferSize); - - receiver_->FlushBuffers(); - receiver_->NetworkStatistics(&statistics); - ASSERT_EQ(0, statistics.currentBufferSize); -} - -TEST_F(AcmReceiverTest, DISABLED_ON_ANDROID(PlayoutTimestamp)) { - const int id = ACMCodecDB::kPCM16Bwb; - EXPECT_EQ(0, receiver_->AddCodec(id, codecs_[id].pltype, codecs_[id].channels, - NULL)); - receiver_->SetPlayoutMode(fax); - const int kNumPackets = 5; - const int num_10ms_frames = codecs_[id].pacsize / (codecs_[id].plfreq / 100); - uint32_t expected_timestamp; - AudioFrame frame; - int ts_offset = 0; - bool first_audio_frame = true; - for (int n = 0; n < kNumPackets; ++n) { - packet_sent_ = false; - InsertOnePacketOfSilence(id); - ASSERT_TRUE(packet_sent_); - expected_timestamp = last_packet_send_timestamp_; - for (int k = 0; k < num_10ms_frames; ++k) { - ASSERT_EQ(0, receiver_->GetAudio(codecs_[id].plfreq, &frame)); - if (first_audio_frame) { - // There is an offset in playout timestamps. Perhaps, it is related to - // initial delay that NetEq applies - ts_offset = receiver_->PlayoutTimestamp() - expected_timestamp; - first_audio_frame = false; - } else { - EXPECT_EQ(expected_timestamp + ts_offset, - receiver_->PlayoutTimestamp()); - } - expected_timestamp += codecs_[id].plfreq / 100; // Increment by 10 ms. - } - } -} - TEST_F(AcmReceiverTest, DISABLED_ON_ANDROID(LastAudioCodec)) { const int kCodecId[] = { ACMCodecDB::kISAC, ACMCodecDB::kPCMA, ACMCodecDB::kISACSWB, diff --git a/webrtc/modules/audio_coding/neteq4/audio_classifier.h b/webrtc/modules/audio_coding/neteq4/audio_classifier.h index e64e110d8..1d041b301 100644 --- a/webrtc/modules/audio_coding/neteq4/audio_classifier.h +++ b/webrtc/modules/audio_coding/neteq4/audio_classifier.h @@ -41,7 +41,7 @@ class AudioClassifier { bool Analysis(const int16_t* input, int input_length, int channels); // Gets the current classification : true = music, false = speech. - bool is_music() const { return is_music_; } + virtual bool is_music() const { return is_music_; } // Gets the current music probability. float music_probability() const { return music_probability_; } diff --git a/webrtc/modules/audio_coding/neteq4/decision_logic.cc b/webrtc/modules/audio_coding/neteq4/decision_logic.cc index 04b886a2e..85edbb4dc 100644 --- a/webrtc/modules/audio_coding/neteq4/decision_logic.cc +++ b/webrtc/modules/audio_coding/neteq4/decision_logic.cc @@ -145,8 +145,8 @@ Operations DecisionLogic::GetDecision(const SyncBuffer& sync_buffer, reset_decoder); } -void DecisionLogic::ExpandDecision(bool is_expand_decision) { - if (is_expand_decision) { +void DecisionLogic::ExpandDecision(Operations operation) { + if (operation == kExpand) { num_consecutive_expands_++; } else { num_consecutive_expands_ = 0; diff --git a/webrtc/modules/audio_coding/neteq4/decision_logic.h b/webrtc/modules/audio_coding/neteq4/decision_logic.h index aca5ca405..e187449b6 100644 --- a/webrtc/modules/audio_coding/neteq4/decision_logic.h +++ b/webrtc/modules/audio_coding/neteq4/decision_logic.h @@ -92,7 +92,7 @@ class DecisionLogic { // not. Note that this is necessary, since an expand decision can be changed // to kNormal in NetEqImpl::GetDecision if there is still enough data in the // sync buffer. - void ExpandDecision(bool is_expand_decision); + virtual void ExpandDecision(Operations operation); // Adds |value| to |sample_memory_|. void AddSampleMemory(int32_t value) { diff --git a/webrtc/modules/audio_coding/neteq4/decision_logic_normal.h b/webrtc/modules/audio_coding/neteq4/decision_logic_normal.h index 783b001fc..8ec60349f 100644 --- a/webrtc/modules/audio_coding/neteq4/decision_logic_normal.h +++ b/webrtc/modules/audio_coding/neteq4/decision_logic_normal.h @@ -38,6 +38,10 @@ class DecisionLogicNormal : public DecisionLogic { virtual ~DecisionLogicNormal() {} protected: + static const int kAllowMergeWithoutExpandMs = 20; // 20 ms. + static const int kReinitAfterExpands = 100; + static const int kMaxWaitForPacket = 10; + // Returns the operation that should be done next. |sync_buffer| and |expand| // are provided for reference. |decoder_frame_length| is the number of samples // obtained from the last decoded frame. If there is a packet available, the @@ -54,32 +58,29 @@ class DecisionLogicNormal : public DecisionLogic { Modes prev_mode, bool play_dtmf, bool* reset_decoder); - private: - static const int kAllowMergeWithoutExpandMs = 20; // 20 ms. - static const int kReinitAfterExpands = 100; - static const int kMaxWaitForPacket = 10; + // Returns the operation to do given that the expected packet is not + // available, but a packet further into the future is at hand. + virtual Operations FuturePacketAvailable( + const SyncBuffer& sync_buffer, + const Expand& expand, + int decoder_frame_length, Modes prev_mode, + uint32_t target_timestamp, + uint32_t available_timestamp, + bool play_dtmf); + // Returns the operation to do given that the expected packet is available. + virtual Operations ExpectedPacketAvailable(Modes prev_mode, bool play_dtmf); + + // Returns the operation given that no packets are available (except maybe + // a DTMF event, flagged by setting |play_dtmf| true). + virtual Operations NoPacket(bool play_dtmf); + + private: // Returns the operation given that the next available packet is a comfort // noise payload (RFC 3389 only, not codec-internal). Operations CngOperation(Modes prev_mode, uint32_t target_timestamp, uint32_t available_timestamp); - // Returns the operation given that no packets are available (except maybe - // a DTMF event, flagged by setting |play_dtmf| true). - Operations NoPacket(bool play_dtmf); - - // Returns the operation to do given that the expected packet is available. - Operations ExpectedPacketAvailable(Modes prev_mode, bool play_dtmf); - - // Returns the operation to do given that the expected packet is not - // available, but a packet further into the future is at hand. - Operations FuturePacketAvailable(const SyncBuffer& sync_buffer, - const Expand& expand, - int decoder_frame_length, Modes prev_mode, - uint32_t target_timestamp, - uint32_t available_timestamp, - bool play_dtmf); - // Checks if enough time has elapsed since the last successful timescale // operation was done (i.e., accelerate or preemptive expand). bool TimescaleAllowed() const { return timescale_hold_off_ == 0; } diff --git a/webrtc/modules/audio_coding/neteq4/expand.cc b/webrtc/modules/audio_coding/neteq4/expand.cc index cba99243d..9b08de15a 100644 --- a/webrtc/modules/audio_coding/neteq4/expand.cc +++ b/webrtc/modules/audio_coding/neteq4/expand.cc @@ -56,20 +56,9 @@ int Expand::Process(AudioMultiVector* output) { // This is not the first expansion, parameters are already estimated. // Extract a noise segment. int16_t rand_length = max_lag_; - // TODO(hlundin): This if-statement should not be needed. Should be just - // as good to generate all of the vector in one call in either case. - if (rand_length <= RandomVector::kRandomTableSize) { - random_vector_->IncreaseSeedIncrement(2); - random_vector_->Generate(rand_length, random_vector); - } else { - // This only applies to SWB where length could be larger than 256. - assert(rand_length <= kMaxSampleRate / 8000 * 120 + 30); - random_vector_->IncreaseSeedIncrement(2); - random_vector_->Generate(RandomVector::kRandomTableSize, random_vector); - random_vector_->IncreaseSeedIncrement(2); - random_vector_->Generate(rand_length - RandomVector::kRandomTableSize, - &random_vector[RandomVector::kRandomTableSize]); - } + // This only applies to SWB where length could be larger than 256. + assert(rand_length <= kMaxSampleRate / 8000 * 120 + 30); + GenerateRandomVector(2, rand_length, random_vector); } @@ -262,82 +251,12 @@ int Expand::Process(AudioMultiVector* output) { } // Background noise part. - // TODO(hlundin): Move to separate method? In BackgroundNoise class? - if (background_noise_->initialized()) { - // Use background noise parameters. - memcpy(noise_vector - kNoiseLpcOrder, - background_noise_->FilterState(channel_ix), - sizeof(int16_t) * kNoiseLpcOrder); - - if (background_noise_->ScaleShift(channel_ix) > 1) { - add_constant = 1 << (background_noise_->ScaleShift(channel_ix) - 1); - } else { - add_constant = 0; - } - - // Scale random vector to correct energy level. - WebRtcSpl_AffineTransformVector( - scaled_random_vector, random_vector, - background_noise_->Scale(channel_ix), add_constant, - background_noise_->ScaleShift(channel_ix), - static_cast(current_lag)); - - WebRtcSpl_FilterARFastQ12(scaled_random_vector, noise_vector, - background_noise_->Filter(channel_ix), - kNoiseLpcOrder + 1, - static_cast(current_lag)); - - background_noise_->SetFilterState( - channel_ix, - &(noise_vector[current_lag - kNoiseLpcOrder]), - kNoiseLpcOrder); - - // Unmute the background noise. - int16_t bgn_mute_factor = background_noise_->MuteFactor(channel_ix); - NetEqBackgroundNoiseMode bgn_mode = background_noise_->mode(); - if (bgn_mode == kBgnFade && - consecutive_expands_ >= kMaxConsecutiveExpands && - bgn_mute_factor > 0) { - // Fade BGN to zero. - // Calculate muting slope, approximately -2^18 / fs_hz. - int16_t mute_slope; - if (fs_hz_ == 8000) { - mute_slope = -32; - } else if (fs_hz_ == 16000) { - mute_slope = -16; - } else if (fs_hz_ == 32000) { - mute_slope = -8; - } else { - mute_slope = -5; - } - // Use UnmuteSignal function with negative slope. - // |bgn_mute_factor| is in Q14. |mute_slope| is in Q20. - DspHelper::UnmuteSignal(noise_vector, current_lag, &bgn_mute_factor, - mute_slope, noise_vector); - } else if (bgn_mute_factor < 16384) { - // If mode is kBgnOff, or if kBgnFade has started fading, - // Use regular |mute_slope|. - if (!stop_muting_ && bgn_mode != kBgnOff && - !(bgn_mode == kBgnFade && - consecutive_expands_ >= kMaxConsecutiveExpands)) { - DspHelper::UnmuteSignal(noise_vector, static_cast(current_lag), - &bgn_mute_factor, parameters.mute_slope, - noise_vector); - } else { - // kBgnOn and stop muting, or - // kBgnOff (mute factor is always 0), or - // kBgnFade has reached 0. - WebRtcSpl_AffineTransformVector(noise_vector, noise_vector, - bgn_mute_factor, 8192, 14, - static_cast(current_lag)); - } - } - // Update mute_factor in BackgroundNoise class. - background_noise_->SetMuteFactor(channel_ix, bgn_mute_factor); - } else { - // BGN parameters have not been initialized; use zero noise. - memset(noise_vector, 0, sizeof(int16_t) * current_lag); - } + GenerateBackgroundNoise(random_vector, + channel_ix, + channel_parameters_[channel_ix].mute_slope, + TooManyExpands(), + current_lag, + unvoiced_array_memory); // Add background noise to the combined voiced-unvoiced signal. for (size_t i = 0; i < current_lag; i++) { @@ -353,11 +272,8 @@ int Expand::Process(AudioMultiVector* output) { } // Increase call number and cap it. - ++consecutive_expands_; - if (consecutive_expands_ > kMaxConsecutiveExpands) { - consecutive_expands_ = kMaxConsecutiveExpands; - } - + consecutive_expands_ = consecutive_expands_ >= kMaxConsecutiveExpands ? + kMaxConsecutiveExpands : consecutive_expands_ + 1; return 0; } @@ -373,6 +289,24 @@ void Expand::SetParametersForMergeAfterExpand() { stop_muting_ = true; } +void Expand::InitializeForAnExpandPeriod() { + lag_index_direction_ = 1; + current_lag_index_ = -1; + stop_muting_ = false; + random_vector_->set_seed_increment(1); + consecutive_expands_ = 0; + for (size_t ix = 0; ix < num_channels_; ++ix) { + channel_parameters_[ix].current_voice_mix_factor = 16384; // 1.0 in Q14. + channel_parameters_[ix].mute_factor = 16384; // 1.0 in Q14. + // Start with 0 gain for background noise. + background_noise_->SetMuteFactor(ix, 0); + } +} + +bool Expand::TooManyExpands() { + return consecutive_expands_ >= kMaxConsecutiveExpands; +} + void Expand::AnalyzeSignal(int16_t* random_vector) { int32_t auto_correlation[kUnvoicedLpcOrder + 1]; int16_t reflection_coeff[kUnvoicedLpcOrder]; @@ -400,18 +334,8 @@ void Expand::AnalyzeSignal(int16_t* random_vector) { const int16_t* audio_history = &(*sync_buffer_)[0][sync_buffer_->Size() - signal_length]; - // Initialize some member variables. - lag_index_direction_ = 1; - current_lag_index_ = -1; - stop_muting_ = false; - random_vector_->set_seed_increment(1); - consecutive_expands_ = 0; - for (size_t ix = 0; ix < num_channels_; ++ix) { - channel_parameters_[ix].current_voice_mix_factor = 16384; // 1.0 in Q14. - channel_parameters_[ix].mute_factor = 16384; // 1.0 in Q14. - // Start with 0 gain for background noise. - background_noise_->SetMuteFactor(ix, 0); - } + // Initialize. + InitializeForAnExpandPeriod(); // Calculate correlation in downsampled domain (4 kHz sample rate). int16_t correlation_scale; @@ -873,5 +797,108 @@ Expand* ExpandFactory::Create(BackgroundNoise* background_noise, num_channels); } +// TODO(turajs): This can be moved to BackgroundNoise class. +void Expand::GenerateBackgroundNoise(int16_t* random_vector, + size_t channel, + int16_t mute_slope, + bool too_many_expands, + size_t num_noise_samples, + int16_t* buffer) { + static const int kNoiseLpcOrder = BackgroundNoise::kMaxLpcOrder; + int16_t scaled_random_vector[kMaxSampleRate / 8000 * 125]; + assert(kMaxSampleRate / 8000 * 125 >= (int)num_noise_samples); + int16_t* noise_samples = &buffer[kNoiseLpcOrder]; + if (background_noise_->initialized()) { + // Use background noise parameters. + memcpy(noise_samples - kNoiseLpcOrder, + background_noise_->FilterState(channel), + sizeof(int16_t) * kNoiseLpcOrder); + + int dc_offset = 0; + if (background_noise_->ScaleShift(channel) > 1) { + dc_offset = 1 << (background_noise_->ScaleShift(channel) - 1); + } + + // Scale random vector to correct energy level. + WebRtcSpl_AffineTransformVector( + scaled_random_vector, random_vector, + background_noise_->Scale(channel), dc_offset, + background_noise_->ScaleShift(channel), + static_cast(num_noise_samples)); + + WebRtcSpl_FilterARFastQ12(scaled_random_vector, noise_samples, + background_noise_->Filter(channel), + kNoiseLpcOrder + 1, + static_cast(num_noise_samples)); + + background_noise_->SetFilterState( + channel, + &(noise_samples[num_noise_samples - kNoiseLpcOrder]), + kNoiseLpcOrder); + + // Unmute the background noise. + int16_t bgn_mute_factor = background_noise_->MuteFactor(channel); + NetEqBackgroundNoiseMode bgn_mode = background_noise_->mode(); + if (bgn_mode == kBgnFade && too_many_expands && bgn_mute_factor > 0) { + // Fade BGN to zero. + // Calculate muting slope, approximately -2^18 / fs_hz. + int16_t mute_slope; + if (fs_hz_ == 8000) { + mute_slope = -32; + } else if (fs_hz_ == 16000) { + mute_slope = -16; + } else if (fs_hz_ == 32000) { + mute_slope = -8; + } else { + mute_slope = -5; + } + // Use UnmuteSignal function with negative slope. + // |bgn_mute_factor| is in Q14. |mute_slope| is in Q20. + DspHelper::UnmuteSignal(noise_samples, + num_noise_samples, + &bgn_mute_factor, + mute_slope, + noise_samples); + } else if (bgn_mute_factor < 16384) { + // If mode is kBgnOff, or if kBgnFade has started fading, + // Use regular |mute_slope|. + if (!stop_muting_ && bgn_mode != kBgnOff && + !(bgn_mode == kBgnFade && too_many_expands)) { + DspHelper::UnmuteSignal(noise_samples, + static_cast(num_noise_samples), + &bgn_mute_factor, + mute_slope, + noise_samples); + } else { + // kBgnOn and stop muting, or + // kBgnOff (mute factor is always 0), or + // kBgnFade has reached 0. + WebRtcSpl_AffineTransformVector(noise_samples, noise_samples, + bgn_mute_factor, 8192, 14, + static_cast(num_noise_samples)); + } + } + // Update mute_factor in BackgroundNoise class. + background_noise_->SetMuteFactor(channel, bgn_mute_factor); + } else { + // BGN parameters have not been initialized; use zero noise. + memset(noise_samples, 0, sizeof(int16_t) * num_noise_samples); + } +} + +void Expand::GenerateRandomVector(int seed_increment, + size_t length, + int16_t* random_vector) { + // TODO(turajs): According to hlundin The loop should not be needed. Should be + // just as good to generate all of the vector in one call. + size_t samples_generated = 0; + const size_t kMaxRandSamples = RandomVector::kRandomTableSize; + while(samples_generated < length) { + size_t rand_length = std::min(length - samples_generated, kMaxRandSamples); + random_vector_->IncreaseSeedIncrement(seed_increment); + random_vector_->Generate(rand_length, &random_vector[samples_generated]); + samples_generated += rand_length; + } +} } // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/expand.h b/webrtc/modules/audio_coding/neteq4/expand.h index 4de8d7c55..aec1cd9b6 100644 --- a/webrtc/modules/audio_coding/neteq4/expand.h +++ b/webrtc/modules/audio_coding/neteq4/expand.h @@ -36,12 +36,13 @@ class Expand { RandomVector* random_vector, int fs, size_t num_channels) - : background_noise_(background_noise), + : random_vector_(random_vector), sync_buffer_(sync_buffer), - random_vector_(random_vector), first_expand_(true), fs_hz_(fs), num_channels_(num_channels), + consecutive_expands_(0), + background_noise_(background_noise), overlap_length_(5 * fs / 8000), lag_index_direction_(0), current_lag_index_(0), @@ -57,19 +58,19 @@ class Expand { virtual ~Expand() {} // Resets the object. - void Reset(); + virtual void Reset(); // The main method to produce concealment data. The data is appended to the // end of |output|. - int Process(AudioMultiVector* output); + virtual int Process(AudioMultiVector* output); // Prepare the object to do extra expansion during normal operation following // a period of expands. - void SetParametersForNormalAfterExpand(); + virtual void SetParametersForNormalAfterExpand(); // Prepare the object to do extra expansion during merge operation following // a period of expands. - void SetParametersForMergeAfterExpand(); + virtual void SetParametersForMergeAfterExpand(); // Sets the mute factor for |channel| to |value|. void SetMuteFactor(int16_t value, size_t channel) { @@ -84,9 +85,38 @@ class Expand { } // Accessors and mutators. - size_t overlap_length() const { return overlap_length_; } + virtual size_t overlap_length() const { return overlap_length_; } int16_t max_lag() const { return max_lag_; } + protected: + static const int kMaxConsecutiveExpands = 200; + void GenerateRandomVector(int seed_increment, + size_t length, + int16_t* random_vector); + + void GenerateBackgroundNoise(int16_t* random_vector, + size_t channel, + int16_t mute_slope, + bool too_many_expands, + size_t num_noise_samples, + int16_t* buffer); + + // Initializes member variables at the beginning of an expand period. + void InitializeForAnExpandPeriod(); + + bool TooManyExpands(); + + // Analyzes the signal history in |sync_buffer_|, and set up all parameters + // necessary to produce concealment data. + void AnalyzeSignal(int16_t* random_vector); + + RandomVector* random_vector_; + SyncBuffer* sync_buffer_; + bool first_expand_; + const int fs_hz_; + const size_t num_channels_; + int consecutive_expands_; + private: static const int kUnvoicedLpcOrder = 6; static const int kNumCorrelationCandidates = 3; @@ -94,7 +124,6 @@ class Expand { static const int kLpcAnalysisLength = 160; static const int kMaxSampleRate = 48000; static const int kNumLags = 3; - static const int kMaxConsecutiveExpands = 200; struct ChannelParameters { // Constructor. @@ -122,10 +151,6 @@ class Expand { int16_t mute_slope; /* Q20 */ }; - // Analyze the signal history in |sync_buffer_|, and set up all parameters - // necessary to produce concealment data. - void AnalyzeSignal(int16_t* random_vector); - // Calculate the auto-correlation of |input|, with length |input_length| // samples. The correlation is calculated from a downsampled version of // |input|, and is written to |output|. The scale factor is written to @@ -136,13 +161,7 @@ class Expand { void UpdateLagIndex(); BackgroundNoise* background_noise_; - SyncBuffer* sync_buffer_; - RandomVector* random_vector_; - bool first_expand_; - const int fs_hz_; - const size_t num_channels_; const size_t overlap_length_; - int consecutive_expands_; int16_t max_lag_; size_t expand_lags_[kNumLags]; int lag_index_direction_; diff --git a/webrtc/modules/audio_coding/neteq4/interface/neteq.h b/webrtc/modules/audio_coding/neteq4/interface/neteq.h index 466882a5f..c0f7fd69b 100644 --- a/webrtc/modules/audio_coding/neteq4/interface/neteq.h +++ b/webrtc/modules/audio_coding/neteq4/interface/neteq.h @@ -108,7 +108,8 @@ class NetEq { // Creates a new NetEq object, starting at the sample rate |sample_rate_hz|. // (Note that it will still change the sample rate depending on what payloads // are being inserted; |sample_rate_hz| is just for startup configuration.) - static NetEq* Create(int sample_rate_hz); + static NetEq* Create(int sample_rate_hz, + bool enable_audio_classifier = false); virtual ~NetEq() {} diff --git a/webrtc/modules/audio_coding/neteq4/merge.cc b/webrtc/modules/audio_coding/neteq4/merge.cc index 4b5601657..90068bb4a 100644 --- a/webrtc/modules/audio_coding/neteq4/merge.cc +++ b/webrtc/modules/audio_coding/neteq4/merge.cc @@ -20,6 +20,7 @@ #include "webrtc/modules/audio_coding/neteq4/dsp_helper.h" #include "webrtc/modules/audio_coding/neteq4/expand.h" #include "webrtc/modules/audio_coding/neteq4/sync_buffer.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" namespace webrtc { @@ -307,9 +308,11 @@ int16_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max, stop_position_downsamp, correlation_shift, 1); // Normalize correlation to 14 bits and copy to a 16-bit array. - static const int kPadLength = 4; - int16_t correlation16[kPadLength + kMaxCorrelationLength + kPadLength] = {0}; - int16_t* correlation_ptr = &correlation16[kPadLength]; + const int pad_length = static_cast(expand_->overlap_length() - 1); + const int correlation_buffer_size = 2 * pad_length + kMaxCorrelationLength; + scoped_ptr correlation16(new int16_t[correlation_buffer_size]); + memset(correlation16.get(), 0, correlation_buffer_size * sizeof(int16_t)); + int16_t* correlation_ptr = &correlation16[pad_length]; int32_t max_correlation = WebRtcSpl_MaxAbsValueW32(correlation, stop_position_downsamp); int16_t norm_shift = std::max(0, 17 - WebRtcSpl_NormW32(max_correlation)); @@ -332,7 +335,7 @@ int16_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max, // start index |start_index_downsamp| and the effective array length. int modified_stop_pos = std::min(stop_position_downsamp, - kMaxCorrelationLength + kPadLength - start_index_downsamp); + kMaxCorrelationLength + pad_length - start_index_downsamp); int best_correlation_index; int16_t best_correlation; static const int kNumCorrelationCandidates = 1; @@ -355,4 +358,9 @@ int16_t Merge::CorrelateAndPeakSearch(int16_t expanded_max, int16_t input_max, return best_correlation_index; } +int Merge::RequiredFutureSamples() { + return static_cast(fs_hz_ / 100 * num_channels_); // 10 ms. +} + + } // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/merge.h b/webrtc/modules/audio_coding/neteq4/merge.h index f1f64e6c5..213b48716 100644 --- a/webrtc/modules/audio_coding/neteq4/merge.h +++ b/webrtc/modules/audio_coding/neteq4/merge.h @@ -35,8 +35,8 @@ class Merge { public: Merge(int fs_hz, size_t num_channels, Expand* expand, SyncBuffer* sync_buffer) : fs_hz_(fs_hz), - fs_mult_(fs_hz_ / 8000), num_channels_(num_channels), + fs_mult_(fs_hz_ / 8000), timestamps_per_call_(fs_hz_ / 100), expand_(expand), sync_buffer_(sync_buffer), @@ -44,6 +44,8 @@ class Merge { assert(num_channels_ > 0); } + virtual ~Merge() {} + // The main method to produce the audio data. The decoded data is supplied in // |input|, having |input_length| samples in total for all channels // (interleaved). The result is written to |output|. The number of channels @@ -51,9 +53,15 @@ class Merge { // de-interleaving |input|. The values in |external_mute_factor_array| (Q14) // will be used to scale the audio, and is updated in the process. The array // must have |num_channels_| elements. - int Process(int16_t* input, size_t input_length, - int16_t* external_mute_factor_array, - AudioMultiVector* output); + virtual int Process(int16_t* input, size_t input_length, + int16_t* external_mute_factor_array, + AudioMultiVector* output); + + virtual int RequiredFutureSamples(); + + protected: + const int fs_hz_; + const size_t num_channels_; private: static const int kMaxSampleRate = 48000; @@ -87,9 +95,7 @@ class Merge { int start_position, int input_length, int expand_period) const; - const int fs_hz_; const int fs_mult_; // fs_hz_ / 8000. - const size_t num_channels_; const int timestamps_per_call_; Expand* expand_; SyncBuffer* sync_buffer_; diff --git a/webrtc/modules/audio_coding/neteq4/neteq.cc b/webrtc/modules/audio_coding/neteq4/neteq.cc index a64f01b25..9f36a9655 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq.cc +++ b/webrtc/modules/audio_coding/neteq4/neteq.cc @@ -28,7 +28,7 @@ namespace webrtc { // Creates all classes needed and inject them into a new NetEqImpl object. // Return the new object. -NetEq* NetEq::Create(int sample_rate_hz) { +NetEq* NetEq::Create(int sample_rate_hz, bool enable_audio_classifier) { BufferLevelFilter* buffer_level_filter = new BufferLevelFilter; DecoderDatabase* decoder_database = new DecoderDatabase; DelayPeakDetector* delay_peak_detector = new DelayPeakDetector; diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl.cc b/webrtc/modules/audio_coding/neteq4/neteq_impl.cc index 8ab6205ec..e407ee83e 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq_impl.cc +++ b/webrtc/modules/audio_coding/neteq4/neteq_impl.cc @@ -61,7 +61,8 @@ NetEqImpl::NetEqImpl(int fs, TimestampScaler* timestamp_scaler, AccelerateFactory* accelerate_factory, ExpandFactory* expand_factory, - PreemptiveExpandFactory* preemptive_expand_factory) + PreemptiveExpandFactory* preemptive_expand_factory, + bool create_components) : buffer_level_filter_(buffer_level_filter), decoder_database_(decoder_database), delay_manager_(delay_manager), @@ -103,13 +104,9 @@ NetEqImpl::NetEqImpl(int fs, output_size_samples_ = kOutputSizeMs * 8 * fs_mult_; decoder_frame_length_ = 3 * output_size_samples_; WebRtcSpl_Init(); - decision_logic_.reset(DecisionLogic::Create(fs_hz_, output_size_samples_, - kPlayoutOn, - decoder_database_.get(), - *packet_buffer_.get(), - delay_manager_.get(), - buffer_level_filter_.get())); - SetSampleRateAndChannels(fs, 1); // Default is 1 channel. + if (create_components) { + SetSampleRateAndChannels(fs, 1); // Default is 1 channel. + } } NetEqImpl::~NetEqImpl() { @@ -284,12 +281,7 @@ void NetEqImpl::SetPlayoutMode(NetEqPlayoutMode mode) { CriticalSectionScoped lock(crit_sect_.get()); if (!decision_logic_.get() || mode != decision_logic_->playout_mode()) { // The reset() method calls delete for the old object. - decision_logic_.reset(DecisionLogic::Create(fs_hz_, output_size_samples_, - mode, - decoder_database_.get(), - *packet_buffer_.get(), - delay_manager_.get(), - buffer_level_filter_.get())); + CreateDecisionLogic(mode); } } @@ -948,7 +940,7 @@ int NetEqImpl::GetDecision(Operations* operation, return 0; } - decision_logic_->ExpandDecision(*operation == kExpand); + decision_logic_->ExpandDecision(*operation); // Check conditions for reset. if (new_codec_ || *operation == kUndefined) { @@ -1067,6 +1059,11 @@ int NetEqImpl::GetDecision(Operations* operation, // Move on with the preemptive expand decision. break; } + case kMerge: { + required_samples = + std::max(merge_->RequiredFutureSamples(), required_samples); + break; + } default: { // Do nothing. } @@ -1834,6 +1831,14 @@ int NetEqImpl::ExtractPackets(int required_samples, PacketList* packet_list) { return extracted_samples; } +void NetEqImpl::UpdatePlcComponents(int fs_hz, size_t channels) { + // Delete objects and create new ones. + expand_.reset(expand_factory_->Create(background_noise_.get(), + sync_buffer_.get(), &random_vector_, + fs_hz, channels)); + merge_.reset(new Merge(fs_hz, channels, expand_.get(), sync_buffer_.get())); +} + void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) { LOG_API2(fs_hz, channels); // TODO(hlundin): Change to an enumerator and skip assert. @@ -1881,21 +1886,20 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) { // Reset random vector. random_vector_.Reset(); - // Delete Expand object and create a new one. - expand_.reset(expand_factory_->Create(background_noise_.get(), - sync_buffer_.get(), &random_vector_, - fs_hz, channels)); + UpdatePlcComponents(fs_hz, channels); + // Move index so that we create a small set of future samples (all 0). sync_buffer_->set_next_index(sync_buffer_->next_index() - - expand_->overlap_length()); + expand_->overlap_length()); normal_.reset(new Normal(fs_hz, decoder_database_.get(), *background_noise_, expand_.get())); - merge_.reset(new Merge(fs_hz, channels, expand_.get(), sync_buffer_.get())); accelerate_.reset( accelerate_factory_->Create(fs_hz, channels, *background_noise_)); - preemptive_expand_.reset( - preemptive_expand_factory_->Create(fs_hz, channels, *background_noise_)); + preemptive_expand_.reset(preemptive_expand_factory_->Create( + fs_hz, channels, + *background_noise_, + static_cast(expand_->overlap_length()))); // Delete ComfortNoise object and create a new one. comfort_noise_.reset(new ComfortNoise(fs_hz, decoder_database_.get(), @@ -1908,8 +1912,11 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) { decoded_buffer_.reset(new int16_t[decoded_buffer_length_]); } - // Communicate new sample rate and output size to DecisionLogic object. - assert(decision_logic_.get()); + // Create DecisionLogic if it is not created yet, then communicate new sample + // rate and output size to DecisionLogic object. + if (!decision_logic_.get()) { + CreateDecisionLogic(kPlayoutOn); + } decision_logic_->SetSampleRate(fs_hz_, output_size_samples_); } @@ -1930,4 +1937,12 @@ NetEqOutputType NetEqImpl::LastOutputType() { } } +void NetEqImpl::CreateDecisionLogic(NetEqPlayoutMode mode) { + decision_logic_.reset(DecisionLogic::Create(fs_hz_, output_size_samples_, + mode, + decoder_database_.get(), + *packet_buffer_.get(), + delay_manager_.get(), + buffer_level_filter_.get())); +} } // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl.h b/webrtc/modules/audio_coding/neteq4/neteq_impl.h index dabf2d6cb..3d883a014 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq_impl.h +++ b/webrtc/modules/audio_coding/neteq4/neteq_impl.h @@ -70,7 +70,8 @@ class NetEqImpl : public webrtc::NetEq { TimestampScaler* timestamp_scaler, AccelerateFactory* accelerate_factory, ExpandFactory* expand_factory, - PreemptiveExpandFactory* preemptive_expand_factory); + PreemptiveExpandFactory* preemptive_expand_factory, + bool create_components = true); virtual ~NetEqImpl(); @@ -203,7 +204,7 @@ class NetEqImpl : public webrtc::NetEq { // This accessor method is only intended for testing purposes. virtual const SyncBuffer* sync_buffer_for_test() const; - private: + protected: static const int kOutputSizeMs = 10; static const int kMaxFrameSize = 2880; // 60 ms @ 48 kHz. // TODO(hlundin): Provide a better value for kSyncBufferSize. @@ -331,6 +332,14 @@ class NetEqImpl : public webrtc::NetEq { // GetAudio(). NetEqOutputType LastOutputType() EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); + // Updates Expand and Merge. + virtual void UpdatePlcComponents(int fs_hz, size_t channels) + EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); + + // Creates DecisionLogic object for the given mode. + void CreateDecisionLogic(NetEqPlayoutMode mode) + EXCLUSIVE_LOCKS_REQUIRED(crit_sect_); + const scoped_ptr buffer_level_filter_; const scoped_ptr decoder_database_; const scoped_ptr delay_manager_; @@ -388,6 +397,7 @@ class NetEqImpl : public webrtc::NetEq { int decoded_packet_sequence_number_ GUARDED_BY(crit_sect_); uint32_t decoded_packet_timestamp_ GUARDED_BY(crit_sect_); + private: DISALLOW_COPY_AND_ASSIGN(NetEqImpl); }; diff --git a/webrtc/modules/audio_coding/neteq4/neteq_tests.gypi b/webrtc/modules/audio_coding/neteq4/neteq_tests.gypi index 9d0aa4233..dda2a1ce8 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq_tests.gypi +++ b/webrtc/modules/audio_coding/neteq4/neteq_tests.gypi @@ -49,6 +49,7 @@ 'CODEC_PCM16B_WB', 'CODEC_ISAC_SWB', 'CODEC_PCM16B_32KHZ', + 'CODEC_PCM16B_48KHZ', 'CODEC_CNGCODEC8', 'CODEC_CNGCODEC16', 'CODEC_CNGCODEC32', diff --git a/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc b/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc index 365e233e6..1e8c1288a 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc +++ b/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc @@ -18,6 +18,7 @@ #include #include // memset +#include #include #include #include @@ -232,6 +233,7 @@ class NetEqDecodingTest : public ::testing::Test { unsigned int sim_clock_; int16_t out_data_[kMaxBlockSize]; int output_sample_rate_; + int algorithmic_delay_ms_; }; // Allocating the static const so that it can be passed by reference. @@ -246,12 +248,16 @@ NetEqDecodingTest::NetEqDecodingTest() : neteq_(NULL), rtp_fp_(NULL), sim_clock_(0), - output_sample_rate_(kInitSampleRateHz) { + output_sample_rate_(kInitSampleRateHz), + algorithmic_delay_ms_(0) { memset(out_data_, 0, sizeof(out_data_)); } void NetEqDecodingTest::SetUp() { neteq_ = NetEq::Create(kInitSampleRateHz); + NetEqNetworkStatistics stat; + ASSERT_EQ(0, neteq_->NetworkStatistics(&stat)); + algorithmic_delay_ms_ = stat.current_buffer_size_ms; ASSERT_TRUE(neteq_); LoadDecoders(); } @@ -483,8 +489,8 @@ void NetEqDecodingTest::CheckBgnOff(int sampling_rate_hz, ASSERT_EQ(expected_samples_per_channel, samples_per_channel); // To be able to test the fading of background noise we need at lease to pull - // 610 frames. - const int kFadingThreshold = 610; + // 611 frames. + const int kFadingThreshold = 611; // Test several CNG-to-PLC packet for the expected behavior. The number 20 is // arbitrary, but sufficiently large to test enough number of frames. @@ -1110,12 +1116,16 @@ TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(SyncPacketInsert)) { // First insert several noise like packets, then sync-packets. Decoding all // packets should not produce error, statistics should not show any packet loss // and sync-packets should decode to zero. +// TODO(turajs) we will have a better test if we have a referece NetEq, and +// when Sync packets are inserted in "test" NetEq we insert all-zero payload +// in reference NetEq and compare the output of those two. TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(SyncPacketDecode)) { WebRtcRTPHeader rtp_info; PopulateRtpInfo(0, 0, &rtp_info); const int kPayloadBytes = kBlockSize16kHz * sizeof(int16_t); uint8_t payload[kPayloadBytes]; int16_t decoded[kBlockSize16kHz]; + int algorithmic_frame_delay = algorithmic_delay_ms_ / 10 + 1; for (int n = 0; n < kPayloadBytes; ++n) { payload[n] = (rand() & 0xF0) + 1; // Non-zero random sequence. } @@ -1125,7 +1135,6 @@ TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(SyncPacketDecode)) { int num_channels; int samples_per_channel; uint32_t receive_timestamp = 0; - int delay_samples = 0; for (int n = 0; n < 100; ++n) { ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, receive_timestamp)); @@ -1135,16 +1144,15 @@ TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(SyncPacketDecode)) { ASSERT_EQ(kBlockSize16kHz, samples_per_channel); ASSERT_EQ(1, num_channels); - // Even if there is RTP packet in NetEq's buffer, the first frame pulled - // from NetEq starts with few zero samples. Here we measure this delay. - if (n == 0) { - while (decoded[delay_samples] == 0) delay_samples++; - } rtp_info.header.sequenceNumber++; rtp_info.header.timestamp += kBlockSize16kHz; receive_timestamp += kBlockSize16kHz; } const int kNumSyncPackets = 10; + + // Make sure sufficient number of sync packets are inserted that we can + // conduct a test. + ASSERT_GT(kNumSyncPackets, algorithmic_frame_delay); // Insert sync-packets, the decoded sequence should be all-zero. for (int n = 0; n < kNumSyncPackets; ++n) { ASSERT_EQ(0, neteq_->InsertSyncPacket(rtp_info, receive_timestamp)); @@ -1153,30 +1161,37 @@ TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(SyncPacketDecode)) { &output_type)); ASSERT_EQ(kBlockSize16kHz, samples_per_channel); ASSERT_EQ(1, num_channels); - EXPECT_TRUE(IsAllZero(&decoded[delay_samples], - samples_per_channel * num_channels - delay_samples)); - delay_samples = 0; // Delay only matters in the first frame. + if (n > algorithmic_frame_delay) { + EXPECT_TRUE(IsAllZero(decoded, samples_per_channel * num_channels)); + } rtp_info.header.sequenceNumber++; rtp_info.header.timestamp += kBlockSize16kHz; receive_timestamp += kBlockSize16kHz; } - // We insert a regular packet, if sync packet are not correctly buffered then + + // We insert regular packets, if sync packet are not correctly buffered then // network statistics would show some packet loss. - ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, - receive_timestamp)); - ASSERT_EQ(0, neteq_->GetAudio(kBlockSize16kHz, decoded, - &samples_per_channel, &num_channels, - &output_type)); - // Make sure the last inserted packet is decoded and there are non-zero - // samples. - EXPECT_FALSE(IsAllZero(decoded, samples_per_channel * num_channels)); + for (int n = 0; n <= algorithmic_frame_delay + 10; ++n) { + ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, + receive_timestamp)); + ASSERT_EQ(0, neteq_->GetAudio(kBlockSize16kHz, decoded, + &samples_per_channel, &num_channels, + &output_type)); + if (n >= algorithmic_frame_delay + 1) { + // Expect that this frame contain samples from regular RTP. + EXPECT_TRUE(IsAllNonZero(decoded, samples_per_channel * num_channels)); + } + rtp_info.header.sequenceNumber++; + rtp_info.header.timestamp += kBlockSize16kHz; + receive_timestamp += kBlockSize16kHz; + } NetEqNetworkStatistics network_stats; ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats)); // Expecting a "clean" network. EXPECT_EQ(0, network_stats.packet_loss_rate); EXPECT_EQ(0, network_stats.expand_rate); EXPECT_EQ(0, network_stats.accelerate_rate); - EXPECT_EQ(0, network_stats.preemptive_rate); + EXPECT_LE(network_stats.preemptive_rate, 150); } // Test if the size of the packet buffer reported correctly when containing @@ -1199,7 +1214,8 @@ TEST_F(NetEqDecodingTest, int num_channels; int samples_per_channel; uint32_t receive_timestamp = 0; - for (int n = 0; n < 1; ++n) { + int algorithmic_frame_delay = algorithmic_delay_ms_ / 10 + 1; + for (int n = 0; n < algorithmic_frame_delay; ++n) { ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, receive_timestamp)); ASSERT_EQ(0, neteq_->GetAudio(kBlockSize16kHz, decoded, @@ -1225,7 +1241,8 @@ TEST_F(NetEqDecodingTest, } NetEqNetworkStatistics network_stats; ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats)); - EXPECT_EQ(kNumSyncPackets * 10, network_stats.current_buffer_size_ms); + EXPECT_EQ(kNumSyncPackets * 10 + algorithmic_delay_ms_, + network_stats.current_buffer_size_ms); // Rewind |rtp_info| to that of the first sync packet. memcpy(&rtp_info, &first_sync_packet_rtp_info, sizeof(rtp_info)); @@ -1298,7 +1315,8 @@ void NetEqDecodingTest::WrapTest(uint16_t start_seq_no, if (packets_inserted > 4) { // Expect preferred and actual buffer size to be no more than 2 frames. EXPECT_LE(network_stats.preferred_buffer_size_ms, kFrameSizeMs * 2); - EXPECT_LE(network_stats.current_buffer_size_ms, kFrameSizeMs * 2); + EXPECT_LE(network_stats.current_buffer_size_ms, kFrameSizeMs * 2 + + algorithmic_delay_ms_); } last_seq_no = seq_no; last_timestamp = timestamp; @@ -1362,6 +1380,8 @@ void NetEqDecodingTest::DuplicateCng() { const int kSamples = kFrameSizeMs * kSampleRateKhz; const int kPayloadBytes = kSamples * 2; + const int algorithmic_delay_samples = std::max( + algorithmic_delay_ms_ * kSampleRateKhz, 5 * kSampleRateKhz / 8); // Insert three speech packet. Three are needed to get the frame length // correct. int out_len; @@ -1398,7 +1418,7 @@ void NetEqDecodingTest::DuplicateCng() { kMaxBlockSize, out_data_, &out_len, &num_channels, &type)); ASSERT_EQ(kBlockSize16kHz, out_len); EXPECT_EQ(kOutputCNG, type); - EXPECT_EQ(timestamp - 10, neteq_->PlayoutTimestamp()); + EXPECT_EQ(timestamp - algorithmic_delay_samples, neteq_->PlayoutTimestamp()); // Insert the same CNG packet again. Note that at this point it is old, since // we have already decoded the first copy of it. @@ -1412,7 +1432,8 @@ void NetEqDecodingTest::DuplicateCng() { kMaxBlockSize, out_data_, &out_len, &num_channels, &type)); ASSERT_EQ(kBlockSize16kHz, out_len); EXPECT_EQ(kOutputCNG, type); - EXPECT_EQ(timestamp - 10, neteq_->PlayoutTimestamp()); + EXPECT_EQ(timestamp - algorithmic_delay_samples, + neteq_->PlayoutTimestamp()); } // Insert speech again. @@ -1427,7 +1448,8 @@ void NetEqDecodingTest::DuplicateCng() { kMaxBlockSize, out_data_, &out_len, &num_channels, &type)); ASSERT_EQ(kBlockSize16kHz, out_len); EXPECT_EQ(kOutputNormal, type); - EXPECT_EQ(timestamp + kSamples - 10, neteq_->PlayoutTimestamp()); + EXPECT_EQ(timestamp + kSamples - algorithmic_delay_samples, + neteq_->PlayoutTimestamp()); } TEST_F(NetEqDecodingTest, DiscardDuplicateCng) { DuplicateCng(); } diff --git a/webrtc/modules/audio_coding/neteq4/preemptive_expand.cc b/webrtc/modules/audio_coding/neteq4/preemptive_expand.cc index c7ce31040..dace45eb9 100644 --- a/webrtc/modules/audio_coding/neteq4/preemptive_expand.cc +++ b/webrtc/modules/audio_coding/neteq4/preemptive_expand.cc @@ -101,8 +101,10 @@ PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch( PreemptiveExpand* PreemptiveExpandFactory::Create( int sample_rate_hz, size_t num_channels, - const BackgroundNoise& background_noise) const { - return new PreemptiveExpand(sample_rate_hz, num_channels, background_noise); + const BackgroundNoise& background_noise, + int overlap_samples) const { + return new PreemptiveExpand( + sample_rate_hz, num_channels, background_noise, overlap_samples); } } // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/preemptive_expand.h b/webrtc/modules/audio_coding/neteq4/preemptive_expand.h index 241425e81..983f64d54 100644 --- a/webrtc/modules/audio_coding/neteq4/preemptive_expand.h +++ b/webrtc/modules/audio_coding/neteq4/preemptive_expand.h @@ -29,11 +29,13 @@ class BackgroundNoise; // PreemptiveExpand are implemented. class PreemptiveExpand : public TimeStretch { public: - PreemptiveExpand(int sample_rate_hz, size_t num_channels, - const BackgroundNoise& background_noise) + PreemptiveExpand(int sample_rate_hz, + size_t num_channels, + const BackgroundNoise& background_noise, + int overlap_samples) : TimeStretch(sample_rate_hz, num_channels, background_noise), old_data_length_per_channel_(-1), - overlap_samples_(5 * sample_rate_hz / 8000) { + overlap_samples_(overlap_samples) { } virtual ~PreemptiveExpand() {} @@ -77,7 +79,8 @@ struct PreemptiveExpandFactory { virtual PreemptiveExpand* Create( int sample_rate_hz, size_t num_channels, - const BackgroundNoise& background_noise) const; + const BackgroundNoise& background_noise, + int overlap_samples) const; }; } // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq4/sync_buffer.h b/webrtc/modules/audio_coding/neteq4/sync_buffer.h index e1e5daf1b..0659f199d 100644 --- a/webrtc/modules/audio_coding/neteq4/sync_buffer.h +++ b/webrtc/modules/audio_coding/neteq4/sync_buffer.h @@ -78,7 +78,8 @@ class SyncBuffer : public AudioMultiVector { // created. void Flush(); - const AudioVector& Channel(size_t n) { return *channels_[n]; } + const AudioVector& Channel(size_t n) const { return *channels_[n]; } + AudioVector& Channel(size_t n) { return *channels_[n]; } // Accessors and mutators. size_t next_index() const { return next_index_; } diff --git a/webrtc/modules/audio_coding/neteq4/time_stretch_unittest.cc b/webrtc/modules/audio_coding/neteq4/time_stretch_unittest.cc index 188c18b71..529038ac3 100644 --- a/webrtc/modules/audio_coding/neteq4/time_stretch_unittest.cc +++ b/webrtc/modules/audio_coding/neteq4/time_stretch_unittest.cc @@ -21,14 +21,17 @@ namespace webrtc { TEST(TimeStretch, CreateAndDestroy) { const int kSampleRate = 8000; const size_t kNumChannels = 1; + const int kOverlapSamples = 5 * kSampleRate / 8000; BackgroundNoise bgn(kNumChannels); Accelerate accelerate(kSampleRate, kNumChannels, bgn); - PreemptiveExpand preemptive_expand(kSampleRate, kNumChannels, bgn); + PreemptiveExpand preemptive_expand( + kSampleRate, kNumChannels, bgn, kOverlapSamples); } TEST(TimeStretch, CreateUsingFactory) { const int kSampleRate = 8000; const size_t kNumChannels = 1; + const int kOverlapSamples = 5 * kSampleRate / 8000; BackgroundNoise bgn(kNumChannels); AccelerateFactory accelerate_factory; @@ -38,8 +41,8 @@ TEST(TimeStretch, CreateUsingFactory) { delete accelerate; PreemptiveExpandFactory preemptive_expand_factory; - PreemptiveExpand* preemptive_expand = - preemptive_expand_factory.Create(kSampleRate, kNumChannels, bgn); + PreemptiveExpand* preemptive_expand = preemptive_expand_factory.Create( + kSampleRate, kNumChannels, bgn, kOverlapSamples); EXPECT_TRUE(preemptive_expand != NULL); delete preemptive_expand; }