API add to set background noise mode.

Background noise mode.

BUG=
R=henrik.lundin@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/2194005

git-svn-id: http://webrtc.googlecode.com/svn/trunk@4835 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
turaj@webrtc.org 2013-09-25 00:07:27 +00:00
parent 8d757ac0a2
commit ff43c85ef1
6 changed files with 166 additions and 25 deletions

View File

@ -14,6 +14,7 @@
#include <string.h> // size_t
#include "webrtc/modules/audio_coding/neteq4/audio_multi_vector.h"
#include "webrtc/modules/audio_coding/neteq4/interface/neteq.h"
#include "webrtc/system_wrappers/interface/constructor_magic.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/typedefs.h"
@ -26,12 +27,6 @@ class PostDecodeVad;
// This class handles estimation of background noise parameters.
class BackgroundNoise {
public:
enum BackgroundNoiseMode {
kBgnOn, // Default behavior with eternal noise.
kBgnFade, // Noise fades to zero after some time.
kBgnOff // Background noise is always zero.
};
// TODO(hlundin): For 48 kHz support, increase kMaxLpcOrder to 10.
// Will work anyway, but probably sound a little worse.
static const int kMaxLpcOrder = 8; // 32000 / 8000 + 4.
@ -73,7 +68,11 @@ class BackgroundNoise {
// Accessors.
bool initialized() const { return initialized_; }
BackgroundNoiseMode mode() const { return mode_; }
NetEqBackgroundNoiseMode mode() const { return mode_; }
// Sets the mode of the background noise playout for cases when there is long
// duration of packet loss.
void set_mode(NetEqBackgroundNoiseMode mode) { mode_ = mode; }
private:
static const int kThresholdIncrement = 229; // 0.0035 in Q16.
@ -129,7 +128,7 @@ class BackgroundNoise {
size_t num_channels_;
scoped_array<ChannelParameters> channel_parameters_;
bool initialized_;
BackgroundNoiseMode mode_;
NetEqBackgroundNoiseMode mode_;
DISALLOW_COPY_AND_ASSIGN(BackgroundNoise);
};

View File

@ -294,8 +294,8 @@ int Expand::Process(AudioMultiVector<int16_t>* output) {
// Unmute the background noise.
int16_t bgn_mute_factor = background_noise_->MuteFactor(channel_ix);
BackgroundNoise::BackgroundNoiseMode bgn_mode = background_noise_->mode();
if (bgn_mode == BackgroundNoise::kBgnFade &&
NetEqBackgroundNoiseMode bgn_mode = background_noise_->mode();
if (bgn_mode == kBgnFade &&
consecutive_expands_ >= kMaxConsecutiveExpands &&
bgn_mute_factor > 0) {
// Fade BGN to zero.
@ -317,8 +317,8 @@ int Expand::Process(AudioMultiVector<int16_t>* output) {
} else if (bgn_mute_factor < 16384) {
// If mode is kBgnOff, or if kBgnFade has started fading,
// Use regular |mute_slope|.
if (!stop_muting_ && bgn_mode != BackgroundNoise::kBgnOff &&
!(bgn_mode == BackgroundNoise::kBgnFade &&
if (!stop_muting_ && bgn_mode != kBgnOff &&
!(bgn_mode == kBgnFade &&
consecutive_expands_ >= kMaxConsecutiveExpands)) {
DspHelper::UnmuteSignal(noise_vector, static_cast<int>(current_lag),
&bgn_mute_factor, parameters.mute_slope,

View File

@ -66,9 +66,9 @@ enum NetEqPlayoutMode {
};
enum NetEqBackgroundNoiseMode {
kBgnOn,
kBgnFade,
kBgnOff
kBgnOn, // Default behavior with eternal noise.
kBgnFade, // Noise fades to zero after some time.
kBgnOff // Background noise is always zero.
};
// This is the interface class for NetEq.
@ -241,14 +241,17 @@ class NetEq {
// Get sequence number and timestamp of the latest RTP.
// This method is to facilitate NACK.
virtual int DecodedRtpInfo(int* sequence_number, uint32_t* timestamp) = 0;
virtual int DecodedRtpInfo(int* sequence_number,
uint32_t* timestamp) const = 0;
// Not implemented.
virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
uint32_t receive_timestamp) = 0;
// Sets the background noise mode.
virtual void SetBackgroundNoiseMode(NetEqBackgroundNoiseMode mode) = 0;
// Gets the background noise mode.
virtual NetEqBackgroundNoiseMode BackgroundNoiseMode() const = 0;
protected:

View File

@ -363,7 +363,7 @@ void NetEqImpl::PacketBufferStatistics(int* current_num_packets,
current_memory_size_bytes, max_memory_size_bytes);
}
int NetEqImpl::DecodedRtpInfo(int* sequence_number, uint32_t* timestamp) {
int NetEqImpl::DecodedRtpInfo(int* sequence_number, uint32_t* timestamp) const {
CriticalSectionScoped lock(crit_sect_.get());
if (decoded_packet_sequence_number_ < 0)
return -1;
@ -377,10 +377,16 @@ int NetEqImpl::InsertSyncPacket(const WebRtcRTPHeader& /* rtp_header */,
return kNotImplemented;
}
void NetEqImpl::SetBackgroundNoiseMode(NetEqBackgroundNoiseMode /* mode */) {}
void NetEqImpl::SetBackgroundNoiseMode(NetEqBackgroundNoiseMode mode) {
CriticalSectionScoped lock(crit_sect_.get());
assert(background_noise_.get());
background_noise_->set_mode(mode);
}
NetEqBackgroundNoiseMode NetEqImpl::BackgroundNoiseMode() const {
return kBgnOn;
CriticalSectionScoped lock(crit_sect_.get());
assert(background_noise_.get());
return background_noise_->mode();
}
// Methods below this line are private.
@ -1759,8 +1765,14 @@ void NetEqImpl::SetSampleRateAndChannels(int fs_hz, size_t channels) {
// Delete sync buffer and create a new one.
sync_buffer_.reset(new SyncBuffer(channels, kSyncBufferSize * fs_mult_));
// Delete BackgroundNoise object and create a new one.
// Delete BackgroundNoise object and create a new one, while preserving its
// mode.
NetEqBackgroundNoiseMode current_mode = kBgnOn;
if (background_noise_.get())
current_mode = background_noise_->mode();
background_noise_.reset(new BackgroundNoise(channels));
background_noise_->set_mode(current_mode);
// Reset random vector.
random_vector_.Reset();

View File

@ -173,15 +173,17 @@ class NetEqImpl : public webrtc::NetEq {
// Get sequence number and timestamp of the latest RTP.
// This method is to facilitate NACK.
virtual int DecodedRtpInfo(int* sequence_number, uint32_t* timestamp);
virtual int DecodedRtpInfo(int* sequence_number, uint32_t* timestamp) const;
// Sets background noise mode.
virtual void SetBackgroundNoiseMode(NetEqBackgroundNoiseMode mode);
// Gets background noise mode.
virtual NetEqBackgroundNoiseMode BackgroundNoiseMode() const;
virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
uint32_t receive_timestamp);
virtual void SetBackgroundNoiseMode(NetEqBackgroundNoiseMode mode);
virtual NetEqBackgroundNoiseMode BackgroundNoiseMode() const;
private:
static const int kOutputSizeMs = 10;
static const int kMaxFrameSize = 2880; // 60 ms @ 48 kHz.

View File

@ -17,11 +17,13 @@
#include <stdlib.h>
#include <string.h> // memset
#include <cmath>
#include <string>
#include <vector>
#include "gtest/gtest.h"
#include "webrtc/modules/audio_coding/neteq4/test/NETEQTEST_RTPpacket.h"
#include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h"
#include "webrtc/test/testsupport/fileutils.h"
#include "webrtc/test/testsupport/gtest_disable.h"
#include "webrtc/typedefs.h"
@ -190,6 +192,8 @@ class NetEqDecodingTest : public ::testing::Test {
uint8_t* payload,
int* payload_len);
void CheckBgnOff(int sampling_rate, NetEqBackgroundNoiseMode bgn_mode);
NetEq* neteq_;
FILE* rtp_fp_;
unsigned int sim_clock_;
@ -375,6 +379,107 @@ void NetEqDecodingTest::PopulateCng(int frame_index,
*payload_len = 1; // Only noise level, no spectral parameters.
}
void NetEqDecodingTest::CheckBgnOff(int sampling_rate_hz,
NetEqBackgroundNoiseMode bgn_mode) {
int expected_samples_per_channel = 0;
uint8_t payload_type = 0xFF; // Invalid.
if (sampling_rate_hz == 8000) {
expected_samples_per_channel = kBlockSize8kHz;
payload_type = 93; // PCM 16, 8 kHz.
} else if (sampling_rate_hz == 16000) {
expected_samples_per_channel = kBlockSize16kHz;
payload_type = 94; // PCM 16, 16 kHZ.
} else if (sampling_rate_hz == 32000) {
expected_samples_per_channel = kBlockSize32kHz;
payload_type = 95; // PCM 16, 32 kHz.
} else {
ASSERT_TRUE(false); // Unsupported test case.
}
NetEqOutputType type;
int16_t output[kBlockSize32kHz]; // Maximum size is chosen.
int16_t input[kBlockSize32kHz]; // Maximum size is chosen.
// Payload of 10 ms of PCM16 32 kHz.
uint8_t payload[kBlockSize32kHz * sizeof(int16_t)];
// Random payload.
for (int n = 0; n < expected_samples_per_channel; ++n) {
input[n] = (rand() & ((1 << 10) - 1)) - ((1 << 5) - 1);
}
int enc_len_bytes = WebRtcPcm16b_EncodeW16(
input, expected_samples_per_channel, reinterpret_cast<int16_t*>(payload));
ASSERT_EQ(enc_len_bytes, expected_samples_per_channel * 2);
WebRtcRTPHeader rtp_info;
PopulateRtpInfo(0, 0, &rtp_info);
rtp_info.header.payloadType = payload_type;
int number_channels = 0;
int samples_per_channel = 0;
uint32_t receive_timestamp = 0;
for (int n = 0; n < 10; ++n) { // Insert few packets and get audio.
number_channels = 0;
samples_per_channel = 0;
ASSERT_EQ(0, neteq_->InsertPacket(
rtp_info, payload, enc_len_bytes, receive_timestamp));
ASSERT_EQ(0, neteq_->GetAudio(kBlockSize32kHz, output, &samples_per_channel,
&number_channels, &type));
ASSERT_EQ(1, number_channels);
ASSERT_EQ(expected_samples_per_channel, samples_per_channel);
ASSERT_EQ(kOutputNormal, type);
// Next packet.
rtp_info.header.timestamp += expected_samples_per_channel;
rtp_info.header.sequenceNumber++;
receive_timestamp += expected_samples_per_channel;
}
number_channels = 0;
samples_per_channel = 0;
// Get audio without inserting packets, expecting PLC and PLC-to-CNG. Pull one
// frame without checking speech-type. This is the first frame pulled without
// inserting any packet, and might not be labeled as PCL.
ASSERT_EQ(0, neteq_->GetAudio(kBlockSize32kHz, output, &samples_per_channel,
&number_channels, &type));
ASSERT_EQ(1, number_channels);
ASSERT_EQ(expected_samples_per_channel, samples_per_channel);
// To be able to test the fading of background noise we need at lease to pull
// 610 frames.
const int kFadingThreshold = 610;
// Test several CNG-to-PLC packet for the expected behavior. The number 20 is
// arbitrary, but sufficiently large to test enough number of frames.
const int kNumPlcToCngTestFrames = 20;
bool plc_to_cng = false;
for (int n = 0; n < kFadingThreshold + kNumPlcToCngTestFrames; ++n) {
number_channels = 0;
samples_per_channel = 0;
memset(output, 1, sizeof(output)); // Set to non-zero.
ASSERT_EQ(0, neteq_->GetAudio(kBlockSize32kHz, output, &samples_per_channel,
&number_channels, &type));
ASSERT_EQ(1, number_channels);
ASSERT_EQ(expected_samples_per_channel, samples_per_channel);
if (type == kOutputPLCtoCNG) {
plc_to_cng = true;
double sum_squared = 0;
for (int k = 0; k < number_channels * samples_per_channel; ++k)
sum_squared += output[k] * output[k];
if (bgn_mode == kBgnOn) {
EXPECT_NE(0, sum_squared);
} else if (bgn_mode == kBgnOff || n > kFadingThreshold) {
EXPECT_EQ(0, sum_squared);
}
} else {
EXPECT_EQ(kOutputPLC, type);
}
}
EXPECT_TRUE(plc_to_cng); // Just to be sure that PLC-to-CNG has occurred.
}
#if defined(_WIN32) && defined(WEBRTC_ARCH_64_BITS)
// Disabled for Windows 64-bit until webrtc:1458 is fixed.
#define MAYBE_TestBitExactness DISABLED_TestBitExactness
@ -731,4 +836,24 @@ TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(GetAudioBeforeInsertPacket)) {
EXPECT_EQ(0, out_data_[i]);
}
}
TEST_F(NetEqDecodingTest, BackgroundNoise) {
neteq_->SetBackgroundNoiseMode(kBgnOn);
CheckBgnOff(8000, kBgnOn);
CheckBgnOff(16000, kBgnOn);
CheckBgnOff(32000, kBgnOn);
EXPECT_EQ(kBgnOn, neteq_->BackgroundNoiseMode());
neteq_->SetBackgroundNoiseMode(kBgnOff);
CheckBgnOff(8000, kBgnOff);
CheckBgnOff(16000, kBgnOff);
CheckBgnOff(32000, kBgnOff);
EXPECT_EQ(kBgnOff, neteq_->BackgroundNoiseMode());
neteq_->SetBackgroundNoiseMode(kBgnFade);
CheckBgnOff(8000, kBgnFade);
CheckBgnOff(16000, kBgnFade);
CheckBgnOff(32000, kBgnFade);
EXPECT_EQ(kBgnFade, neteq_->BackgroundNoiseMode());
}
} // namespace