diff --git a/webrtc/modules/audio_coding/neteq4/background_noise.h b/webrtc/modules/audio_coding/neteq4/background_noise.h index 16848c57c..8d4237f36 100644 --- a/webrtc/modules/audio_coding/neteq4/background_noise.h +++ b/webrtc/modules/audio_coding/neteq4/background_noise.h @@ -14,6 +14,7 @@ #include // size_t #include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h" +#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h" #include "webrtc/system_wrappers/interface/constructor_magic.h" #include "webrtc/system_wrappers/interface/scoped_ptr.h" #include "webrtc/typedefs.h" @@ -26,12 +27,6 @@ class PostDecodeVad; // This class handles estimation of background noise parameters. class BackgroundNoise { public: - enum BackgroundNoiseMode { - kBgnOn, // Default behavior with eternal noise. - kBgnFade, // Noise fades to zero after some time. - kBgnOff // Background noise is always zero. - }; - // TODO(hlundin): For 48 kHz support, increase kMaxLpcOrder to 10. // Will work anyway, but probably sound a little worse. static const int kMaxLpcOrder = 8; // 32000 / 8000 + 4. @@ -73,7 +68,11 @@ class BackgroundNoise { // Accessors. bool initialized() const { return initialized_; } - BackgroundNoiseMode mode() const { return mode_; } + NetEqBackgroundNoiseMode mode() const { return mode_; } + + // Sets the mode of the background noise playout for cases when there is long + // duration of packet loss. + void set_mode(NetEqBackgroundNoiseMode mode) { mode_ = mode; } private: static const int kThresholdIncrement = 229; // 0.0035 in Q16. @@ -129,7 +128,7 @@ class BackgroundNoise { size_t num_channels_; scoped_array channel_parameters_; bool initialized_; - BackgroundNoiseMode mode_; + NetEqBackgroundNoiseMode mode_; DISALLOW_COPY_AND_ASSIGN(BackgroundNoise); }; diff --git a/webrtc/modules/audio_coding/neteq4/expand.cc b/webrtc/modules/audio_coding/neteq4/expand.cc index 2d6dfb5c1..3d9fa3845 100644 --- a/webrtc/modules/audio_coding/neteq4/expand.cc +++ b/webrtc/modules/audio_coding/neteq4/expand.cc @@ -294,8 +294,8 @@ int Expand::Process(AudioMultiVector* output) { // Unmute the background noise. int16_t bgn_mute_factor = background_noise_->MuteFactor(channel_ix); - BackgroundNoise::BackgroundNoiseMode bgn_mode = background_noise_->mode(); - if (bgn_mode == BackgroundNoise::kBgnFade && + NetEqBackgroundNoiseMode bgn_mode = background_noise_->mode(); + if (bgn_mode == kBgnFade && consecutive_expands_ >= kMaxConsecutiveExpands && bgn_mute_factor > 0) { // Fade BGN to zero. @@ -317,8 +317,8 @@ int Expand::Process(AudioMultiVector* output) { } else if (bgn_mute_factor < 16384) { // If mode is kBgnOff, or if kBgnFade has started fading, // Use regular |mute_slope|. - if (!stop_muting_ && bgn_mode != BackgroundNoise::kBgnOff && - !(bgn_mode == BackgroundNoise::kBgnFade && + if (!stop_muting_ && bgn_mode != kBgnOff && + !(bgn_mode == kBgnFade && consecutive_expands_ >= kMaxConsecutiveExpands)) { DspHelper::UnmuteSignal(noise_vector, static_cast(current_lag), &bgn_mute_factor, parameters.mute_slope, diff --git a/webrtc/modules/audio_coding/neteq4/interface/neteq.h b/webrtc/modules/audio_coding/neteq4/interface/neteq.h index 7c39cb1f4..547360b2e 100644 --- a/webrtc/modules/audio_coding/neteq4/interface/neteq.h +++ b/webrtc/modules/audio_coding/neteq4/interface/neteq.h @@ -66,9 +66,9 @@ enum NetEqPlayoutMode { }; enum NetEqBackgroundNoiseMode { - kBgnOn, - kBgnFade, - kBgnOff + kBgnOn, // Default behavior with eternal noise. + kBgnFade, // Noise fades to zero after some time. + kBgnOff // Background noise is always zero. }; // This is the interface class for NetEq. @@ -241,14 +241,17 @@ class NetEq { // Get sequence number and timestamp of the latest RTP. // This method is to facilitate NACK. - virtual int DecodedRtpInfo(int* sequence_number, uint32_t* timestamp) = 0; + virtual int DecodedRtpInfo(int* sequence_number, + uint32_t* timestamp) const = 0; // Not implemented. virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header, uint32_t receive_timestamp) = 0; + // Sets the background noise mode. virtual void SetBackgroundNoiseMode(NetEqBackgroundNoiseMode mode) = 0; + // Gets the background noise mode. virtual NetEqBackgroundNoiseMode BackgroundNoiseMode() const = 0; protected: diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl.cc b/webrtc/modules/audio_coding/neteq4/neteq_impl.cc index 7620a7f4d..d872b80eb 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq_impl.cc +++ b/webrtc/modules/audio_coding/neteq4/neteq_impl.cc @@ -363,7 +363,7 @@ void NetEqImpl::PacketBufferStatistics(int* current_num_packets, current_memory_size_bytes, max_memory_size_bytes); } -int NetEqImpl::DecodedRtpInfo(int* sequence_number, uint32_t* timestamp) { +int NetEqImpl::DecodedRtpInfo(int* sequence_number, uint32_t* timestamp) const { CriticalSectionScoped lock(crit_sect_.get()); if (decoded_packet_sequence_number_ < 0) return -1; @@ -377,10 +377,16 @@ int NetEqImpl::InsertSyncPacket(const WebRtcRTPHeader& /* rtp_header */, return kNotImplemented; } -void NetEqImpl::SetBackgroundNoiseMode(NetEqBackgroundNoiseMode /* mode */) {} +void NetEqImpl::SetBackgroundNoiseMode(NetEqBackgroundNoiseMode mode) { + CriticalSectionScoped lock(crit_sect_.get()); + assert(background_noise_.get()); + background_noise_->set_mode(mode); +} NetEqBackgroundNoiseMode NetEqImpl::BackgroundNoiseMode() const { - return kBgnOn; + CriticalSectionScoped lock(crit_sect_.get()); + assert(background_noise_.get()); + return background_noise_->mode(); } // Methods below this line are private. @@ -1759,8 +1765,14 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) { // Delete sync buffer and create a new one. sync_buffer_.reset(new SyncBuffer(channels, kSyncBufferSize * fs_mult_)); - // Delete BackgroundNoise object and create a new one. + + // Delete BackgroundNoise object and create a new one, while preserving its + // mode. + NetEqBackgroundNoiseMode current_mode = kBgnOn; + if (background_noise_.get()) + current_mode = background_noise_->mode(); background_noise_.reset(new BackgroundNoise(channels)); + background_noise_->set_mode(current_mode); // Reset random vector. random_vector_.Reset(); diff --git a/webrtc/modules/audio_coding/neteq4/neteq_impl.h b/webrtc/modules/audio_coding/neteq4/neteq_impl.h index 60e644fe9..c0416809c 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq_impl.h +++ b/webrtc/modules/audio_coding/neteq4/neteq_impl.h @@ -173,15 +173,17 @@ class NetEqImpl : public webrtc::NetEq { // Get sequence number and timestamp of the latest RTP. // This method is to facilitate NACK. - virtual int DecodedRtpInfo(int* sequence_number, uint32_t* timestamp); + virtual int DecodedRtpInfo(int* sequence_number, uint32_t* timestamp) const; + + // Sets background noise mode. + virtual void SetBackgroundNoiseMode(NetEqBackgroundNoiseMode mode); + + // Gets background noise mode. + virtual NetEqBackgroundNoiseMode BackgroundNoiseMode() const; virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header, uint32_t receive_timestamp); - virtual void SetBackgroundNoiseMode(NetEqBackgroundNoiseMode mode); - - virtual NetEqBackgroundNoiseMode BackgroundNoiseMode() const; - private: static const int kOutputSizeMs = 10; static const int kMaxFrameSize = 2880; // 60 ms @ 48 kHz. diff --git a/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc b/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc index 5ab2d1f31..c6120ceae 100644 --- a/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc +++ b/webrtc/modules/audio_coding/neteq4/neteq_unittest.cc @@ -17,11 +17,13 @@ #include #include // memset +#include #include #include #include "gtest/gtest.h" #include "webrtc/modules/audio_coding/neteq4/test/NETEQTEST_RTPpacket.h" +#include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h" #include "webrtc/test/testsupport/fileutils.h" #include "webrtc/test/testsupport/gtest_disable.h" #include "webrtc/typedefs.h" @@ -190,6 +192,8 @@ class NetEqDecodingTest : public ::testing::Test { uint8_t* payload, int* payload_len); + void CheckBgnOff(int sampling_rate, NetEqBackgroundNoiseMode bgn_mode); + NetEq* neteq_; FILE* rtp_fp_; unsigned int sim_clock_; @@ -375,6 +379,107 @@ void NetEqDecodingTest::PopulateCng(int frame_index, *payload_len = 1; // Only noise level, no spectral parameters. } +void NetEqDecodingTest::CheckBgnOff(int sampling_rate_hz, + NetEqBackgroundNoiseMode bgn_mode) { + int expected_samples_per_channel = 0; + uint8_t payload_type = 0xFF; // Invalid. + if (sampling_rate_hz == 8000) { + expected_samples_per_channel = kBlockSize8kHz; + payload_type = 93; // PCM 16, 8 kHz. + } else if (sampling_rate_hz == 16000) { + expected_samples_per_channel = kBlockSize16kHz; + payload_type = 94; // PCM 16, 16 kHZ. + } else if (sampling_rate_hz == 32000) { + expected_samples_per_channel = kBlockSize32kHz; + payload_type = 95; // PCM 16, 32 kHz. + } else { + ASSERT_TRUE(false); // Unsupported test case. + } + + NetEqOutputType type; + int16_t output[kBlockSize32kHz]; // Maximum size is chosen. + int16_t input[kBlockSize32kHz]; // Maximum size is chosen. + + // Payload of 10 ms of PCM16 32 kHz. + uint8_t payload[kBlockSize32kHz * sizeof(int16_t)]; + + // Random payload. + for (int n = 0; n < expected_samples_per_channel; ++n) { + input[n] = (rand() & ((1 << 10) - 1)) - ((1 << 5) - 1); + } + int enc_len_bytes = WebRtcPcm16b_EncodeW16( + input, expected_samples_per_channel, reinterpret_cast(payload)); + ASSERT_EQ(enc_len_bytes, expected_samples_per_channel * 2); + + WebRtcRTPHeader rtp_info; + PopulateRtpInfo(0, 0, &rtp_info); + rtp_info.header.payloadType = payload_type; + + int number_channels = 0; + int samples_per_channel = 0; + + uint32_t receive_timestamp = 0; + for (int n = 0; n < 10; ++n) { // Insert few packets and get audio. + number_channels = 0; + samples_per_channel = 0; + ASSERT_EQ(0, neteq_->InsertPacket( + rtp_info, payload, enc_len_bytes, receive_timestamp)); + ASSERT_EQ(0, neteq_->GetAudio(kBlockSize32kHz, output, &samples_per_channel, + &number_channels, &type)); + ASSERT_EQ(1, number_channels); + ASSERT_EQ(expected_samples_per_channel, samples_per_channel); + ASSERT_EQ(kOutputNormal, type); + + // Next packet. + rtp_info.header.timestamp += expected_samples_per_channel; + rtp_info.header.sequenceNumber++; + receive_timestamp += expected_samples_per_channel; + } + + number_channels = 0; + samples_per_channel = 0; + + // Get audio without inserting packets, expecting PLC and PLC-to-CNG. Pull one + // frame without checking speech-type. This is the first frame pulled without + // inserting any packet, and might not be labeled as PCL. + ASSERT_EQ(0, neteq_->GetAudio(kBlockSize32kHz, output, &samples_per_channel, + &number_channels, &type)); + ASSERT_EQ(1, number_channels); + ASSERT_EQ(expected_samples_per_channel, samples_per_channel); + + // To be able to test the fading of background noise we need at lease to pull + // 610 frames. + const int kFadingThreshold = 610; + + // Test several CNG-to-PLC packet for the expected behavior. The number 20 is + // arbitrary, but sufficiently large to test enough number of frames. + const int kNumPlcToCngTestFrames = 20; + bool plc_to_cng = false; + for (int n = 0; n < kFadingThreshold + kNumPlcToCngTestFrames; ++n) { + number_channels = 0; + samples_per_channel = 0; + memset(output, 1, sizeof(output)); // Set to non-zero. + ASSERT_EQ(0, neteq_->GetAudio(kBlockSize32kHz, output, &samples_per_channel, + &number_channels, &type)); + ASSERT_EQ(1, number_channels); + ASSERT_EQ(expected_samples_per_channel, samples_per_channel); + if (type == kOutputPLCtoCNG) { + plc_to_cng = true; + double sum_squared = 0; + for (int k = 0; k < number_channels * samples_per_channel; ++k) + sum_squared += output[k] * output[k]; + if (bgn_mode == kBgnOn) { + EXPECT_NE(0, sum_squared); + } else if (bgn_mode == kBgnOff || n > kFadingThreshold) { + EXPECT_EQ(0, sum_squared); + } + } else { + EXPECT_EQ(kOutputPLC, type); + } + } + EXPECT_TRUE(plc_to_cng); // Just to be sure that PLC-to-CNG has occurred. +} + #if defined(_WIN32) && defined(WEBRTC_ARCH_64_BITS) // Disabled for Windows 64-bit until webrtc:1458 is fixed. #define MAYBE_TestBitExactness DISABLED_TestBitExactness @@ -731,4 +836,24 @@ TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(GetAudioBeforeInsertPacket)) { EXPECT_EQ(0, out_data_[i]); } } + +TEST_F(NetEqDecodingTest, BackgroundNoise) { + neteq_->SetBackgroundNoiseMode(kBgnOn); + CheckBgnOff(8000, kBgnOn); + CheckBgnOff(16000, kBgnOn); + CheckBgnOff(32000, kBgnOn); + EXPECT_EQ(kBgnOn, neteq_->BackgroundNoiseMode()); + + neteq_->SetBackgroundNoiseMode(kBgnOff); + CheckBgnOff(8000, kBgnOff); + CheckBgnOff(16000, kBgnOff); + CheckBgnOff(32000, kBgnOff); + EXPECT_EQ(kBgnOff, neteq_->BackgroundNoiseMode()); + + neteq_->SetBackgroundNoiseMode(kBgnFade); + CheckBgnOff(8000, kBgnFade); + CheckBgnOff(16000, kBgnFade); + CheckBgnOff(32000, kBgnFade); + EXPECT_EQ(kBgnFade, neteq_->BackgroundNoiseMode()); +} } // namespace