From bdebccf384db689931ba3df9afbcd59c85ddb8e2 Mon Sep 17 00:00:00 2001 From: "henrik.lundin@webrtc.org" Date: Thu, 29 Jan 2015 14:10:32 +0000 Subject: [PATCH] Fix a number of things in AudioEncoderDecoderIsac* - Add max_bit_rate and max_payload_size_bytes to config structs. - Fix support for 48 kHz sample rate. - Fix iSAC-RED. - Add method UpdateDecoderSampleRate(). - Update locking structure with a separate lock for local member variables used by the encoder methods. BUG=3926 COAUTHOR:kwiberg@webrtc.org R=minyue@webrtc.org Review URL: https://webrtc-codereview.appspot.com/41659004 Cr-Commit-Position: refs/heads/master@{#8204} git-svn-id: http://webrtc.googlecode.com/svn/trunk@8204 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../codecs/isac/audio_encoder_isac_t.h | 50 +++++-- .../codecs/isac/audio_encoder_isac_t_impl.h | 137 ++++++++++++++---- .../fix/interface/audio_encoder_isacfix.h | 9 +- .../isac/main/interface/audio_encoder_isac.h | 9 +- .../source/audio_encoder_isac_red_unittest.cc | 4 + 5 files changed, 165 insertions(+), 44 deletions(-) diff --git a/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t.h b/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t.h index 712d3e69d..65a120449 100644 --- a/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t.h +++ b/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t.h @@ -29,34 +29,45 @@ class AudioEncoderDecoderIsacT : public AudioEncoder, public AudioDecoder { // are // - 16000 Hz, 30 ms, 10000-32000 bps // - 16000 Hz, 60 ms, 10000-32000 bps - // - 32000 Hz, 30 ms, 10000-56000 bps (if T has 32 kHz support) + // - 32000 Hz, 30 ms, 10000-56000 bps (if T has super-wideband support) + // - 48000 Hz, 30 ms, 10000-56000 bps (if T has super-wideband support) struct Config { Config(); bool IsOk() const; int payload_type; + int red_payload_type; int sample_rate_hz; int frame_size_ms; int bit_rate; // Limit on the short-term average bit rate, in bits/second. + int max_bit_rate; + int max_payload_size_bytes; }; - // For constructing an encoder in channel-adaptive mode. The sample rate must - // be 16000 Hz; the initial frame size can be 30 or 60 ms; and the initial - // bit rate can be 10000-56000 bps if T has 32 kHz support, 10000-32000 bps - // otherwise. + // For constructing an encoder in channel-adaptive mode. Allowed combinations + // are + // - 16000 Hz, 30 ms, 10000-32000 bps + // - 16000 Hz, 60 ms, 10000-32000 bps + // - 32000 Hz, 30 ms, 10000-56000 bps (if T has super-wideband support) + // - 48000 Hz, 30 ms, 10000-56000 bps (if T has super-wideband support) struct ConfigAdaptive { ConfigAdaptive(); bool IsOk() const; int payload_type; + int red_payload_type; int sample_rate_hz; int initial_frame_size_ms; int initial_bit_rate; + int max_bit_rate; bool enforce_frame_size; // Prevent adaptive changes to the frame size? + int max_payload_size_bytes; }; explicit AudioEncoderDecoderIsacT(const Config& config); explicit AudioEncoderDecoderIsacT(const ConfigAdaptive& config); virtual ~AudioEncoderDecoderIsacT() OVERRIDE; + void UpdateDecoderSampleRate(int sample_rate_hz); + // AudioEncoder public methods. virtual int sample_rate_hz() const OVERRIDE; virtual int num_channels() const OVERRIDE; @@ -91,24 +102,39 @@ class AudioEncoderDecoderIsacT : public AudioEncoder, public AudioDecoder { EncodedInfo* info) OVERRIDE; private: + // This value is taken from STREAM_SIZE_MAX_60 for iSAC float (60 ms) and + // STREAM_MAXW16_60MS for iSAC fix (60 ms). + static const size_t kSufficientEncodeBufferSizeBytes = 400; + const int payload_type_; + const int red_payload_type_; // iSAC encoder/decoder state, guarded by a mutex to ensure that encode calls // from one thread won't clash with decode calls from another thread. + // Note: PT_GUARDED_BY is disabled since it is not yet supported by clang. + const scoped_ptr state_lock_; + typename T::instance_type* isac_state_ + GUARDED_BY(state_lock_) /* PT_GUARDED_BY(lock_)*/; + + // Must be acquired before state_lock_. const scoped_ptr lock_; - typename T::instance_type* isac_state_ GUARDED_BY(lock_); // Have we accepted input but not yet emitted it in a packet? - bool packet_in_progress_; - - // Working on the very first output frame. - bool first_output_frame_; + bool packet_in_progress_ GUARDED_BY(lock_); // Timestamp of the first input of the currently in-progress packet. - uint32_t packet_timestamp_; + uint32_t packet_timestamp_ GUARDED_BY(lock_); // Timestamp of the previously encoded packet. - uint32_t last_encoded_timestamp_; + uint32_t last_encoded_timestamp_ GUARDED_BY(lock_); + + // Redundant encoding from last time. + // Note: If has_redundant_encoder is false, we set the array length to 1, + // since zero-length arrays are not supported by all compilers. + uint8_t redundant_payload_[T::has_redundant_encoder + ? kSufficientEncodeBufferSizeBytes + : 1] GUARDED_BY(lock_); + size_t redundant_length_bytes_ GUARDED_BY(lock_); DISALLOW_COPY_AND_ASSIGN(AudioEncoderDecoderIsacT); }; diff --git a/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t_impl.h b/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t_impl.h index 5b337c165..b45fb265f 100644 --- a/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t_impl.h +++ b/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t_impl.h @@ -13,29 +13,49 @@ #include "webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h" +#include + +#include "webrtc/base/checks.h" #include "webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h" #include "webrtc/system_wrappers/interface/critical_section_wrapper.h" namespace webrtc { const int kIsacPayloadType = 103; +const int kInvalidPayloadType = -1; template AudioEncoderDecoderIsacT::Config::Config() : payload_type(kIsacPayloadType), + red_payload_type(kInvalidPayloadType), sample_rate_hz(16000), frame_size_ms(30), - bit_rate(32000) { + bit_rate(32000), + max_bit_rate(-1), + max_payload_size_bytes(-1) { } template bool AudioEncoderDecoderIsacT::Config::IsOk() const { + if (max_bit_rate < 32000 && max_bit_rate != -1) + return false; + if (max_payload_size_bytes < 120 && max_payload_size_bytes != -1) + return false; switch (sample_rate_hz) { case 16000: + if (max_bit_rate > 53400) + return false; + if (max_payload_size_bytes > 400) + return false; return (frame_size_ms == 30 || frame_size_ms == 60) && bit_rate >= 10000 && bit_rate <= 32000; case 32000: - return T::has_32kHz && + case 48000: + if (max_bit_rate > 160000) + return false; + if (max_payload_size_bytes > 600) + return false; + return T::has_swb && (frame_size_ms == 30 && bit_rate >= 10000 && bit_rate <= 56000); default: return false; @@ -45,41 +65,76 @@ bool AudioEncoderDecoderIsacT::Config::IsOk() const { template AudioEncoderDecoderIsacT::ConfigAdaptive::ConfigAdaptive() : payload_type(kIsacPayloadType), + red_payload_type(kInvalidPayloadType), sample_rate_hz(16000), initial_frame_size_ms(30), initial_bit_rate(32000), - enforce_frame_size(false) { + max_bit_rate(-1), + enforce_frame_size(false), + max_payload_size_bytes(-1) { } template bool AudioEncoderDecoderIsacT::ConfigAdaptive::IsOk() const { - static const int max_rate = T::has_32kHz ? 56000 : 32000; - return sample_rate_hz == 16000 && - (initial_frame_size_ms == 30 || initial_frame_size_ms == 60) && - initial_bit_rate >= 10000 && initial_bit_rate <= max_rate; + if (max_bit_rate < 32000 && max_bit_rate != -1) + return false; + if (max_payload_size_bytes < 120 && max_payload_size_bytes != -1) + return false; + switch (sample_rate_hz) { + case 16000: + if (max_bit_rate > 53400) + return false; + if (max_payload_size_bytes > 400) + return false; + return (initial_frame_size_ms == 30 || initial_frame_size_ms == 60) && + initial_bit_rate >= 10000 && initial_bit_rate <= 32000; + case 32000: + case 48000: + if (max_bit_rate > 160000) + return false; + if (max_payload_size_bytes > 600) + return false; + return T::has_swb && + (initial_frame_size_ms == 30 && initial_bit_rate >= 10000 && + initial_bit_rate <= 56000); + default: + return false; + } } template AudioEncoderDecoderIsacT::AudioEncoderDecoderIsacT(const Config& config) : payload_type_(config.payload_type), + red_payload_type_(config.red_payload_type), + state_lock_(CriticalSectionWrapper::CreateCriticalSection()), lock_(CriticalSectionWrapper::CreateCriticalSection()), packet_in_progress_(false), - first_output_frame_(true) { + redundant_length_bytes_(0) { CHECK(config.IsOk()); CHECK_EQ(0, T::Create(&isac_state_)); CHECK_EQ(0, T::EncoderInit(isac_state_, 1)); CHECK_EQ(0, T::SetEncSampRate(isac_state_, config.sample_rate_hz)); CHECK_EQ(0, T::Control(isac_state_, config.bit_rate, config.frame_size_ms)); - CHECK_EQ(0, T::SetDecSampRate(isac_state_, config.sample_rate_hz)); + // When config.sample_rate_hz is set to 48000 Hz (iSAC-fb), the decoder is + // still set to 32000 Hz, since there is no full-band mode in the decoder. + CHECK_EQ(0, T::SetDecSampRate(isac_state_, + std::min(config.sample_rate_hz, 32000))); + if (config.max_payload_size_bytes != -1) + CHECK_EQ(0, + T::SetMaxPayloadSize(isac_state_, config.max_payload_size_bytes)); + if (config.max_bit_rate != -1) + CHECK_EQ(0, T::SetMaxRate(isac_state_, config.max_bit_rate)); } template AudioEncoderDecoderIsacT::AudioEncoderDecoderIsacT( const ConfigAdaptive& config) : payload_type_(config.payload_type), + red_payload_type_(config.red_payload_type), + state_lock_(CriticalSectionWrapper::CreateCriticalSection()), lock_(CriticalSectionWrapper::CreateCriticalSection()), packet_in_progress_(false), - first_output_frame_(true) { + redundant_length_bytes_(0) { CHECK(config.IsOk()); CHECK_EQ(0, T::Create(&isac_state_)); CHECK_EQ(0, T::EncoderInit(isac_state_, 0)); @@ -88,6 +143,11 @@ AudioEncoderDecoderIsacT::AudioEncoderDecoderIsacT( config.initial_frame_size_ms, config.enforce_frame_size)); CHECK_EQ(0, T::SetDecSampRate(isac_state_, config.sample_rate_hz)); + if (config.max_payload_size_bytes != -1) + CHECK_EQ(0, + T::SetMaxPayloadSize(isac_state_, config.max_payload_size_bytes)); + if (config.max_bit_rate != -1) + CHECK_EQ(0, T::SetMaxRate(isac_state_, config.max_bit_rate)); } template @@ -95,9 +155,15 @@ AudioEncoderDecoderIsacT::~AudioEncoderDecoderIsacT() { CHECK_EQ(0, T::Free(isac_state_)); } +template +void AudioEncoderDecoderIsacT::UpdateDecoderSampleRate(int sample_rate_hz) { + CriticalSectionScoped cs(state_lock_.get()); + CHECK_EQ(0, T::SetDecSampRate(isac_state_, sample_rate_hz)); +} + template int AudioEncoderDecoderIsacT::sample_rate_hz() const { - CriticalSectionScoped cs(lock_.get()); + CriticalSectionScoped cs(state_lock_.get()); return T::EncSampRate(isac_state_); } @@ -108,7 +174,7 @@ int AudioEncoderDecoderIsacT::num_channels() const { template int AudioEncoderDecoderIsacT::Num10MsFramesInNextPacket() const { - CriticalSectionScoped cs(lock_.get()); + CriticalSectionScoped cs(state_lock_.get()); const int samples_in_next_packet = T::GetNewFrameLen(isac_state_); return rtc::CheckedDivExact(samples_in_next_packet, rtc::CheckedDivExact(sample_rate_hz(), 100)); @@ -125,6 +191,7 @@ bool AudioEncoderDecoderIsacT::EncodeInternal(uint32_t rtp_timestamp, size_t max_encoded_bytes, uint8_t* encoded, EncodedInfo* info) { + CriticalSectionScoped cs(lock_.get()); if (!packet_in_progress_) { // Starting a new packet; remember the timestamp for later. packet_in_progress_ = true; @@ -132,7 +199,7 @@ bool AudioEncoderDecoderIsacT::EncodeInternal(uint32_t rtp_timestamp, } int r; { - CriticalSectionScoped cs(lock_.get()); + CriticalSectionScoped cs(state_lock_.get()); r = T::Encode(isac_state_, audio, encoded); } if (r < 0) { @@ -158,30 +225,40 @@ bool AudioEncoderDecoderIsacT::EncodeInternal(uint32_t rtp_timestamp, if (!T::has_redundant_encoder) return true; - if (first_output_frame_) { + if (redundant_length_bytes_ == 0) { // Do not emit the first output frame when using redundant encoding. info->encoded_bytes = 0; - first_output_frame_ = false; } else { - // Call the encoder's method to get redundant encoding. + // When a redundant payload from the last Encode call is available, the + // resulting payload consists of the primary encoding followed by the + // redundant encoding from last time. const size_t primary_length = info->encoded_bytes; - int16_t secondary_len; - { - CriticalSectionScoped cs(lock_.get()); - secondary_len = T::GetRedPayload(isac_state_, &encoded[primary_length]); - } - DCHECK_GE(secondary_len, 0); + memcpy(&encoded[primary_length], redundant_payload_, + redundant_length_bytes_); + // The EncodedInfo struct |info| will have one root node and two leaves. // |info| will be implicitly cast to an EncodedInfoLeaf struct, effectively // discarding the (empty) vector of redundant information. This is // intentional. info->redundant.push_back(*info); EncodedInfoLeaf secondary_info; secondary_info.payload_type = info->payload_type; - secondary_info.encoded_bytes = secondary_len; + secondary_info.encoded_bytes = redundant_length_bytes_; secondary_info.encoded_timestamp = last_encoded_timestamp_; info->redundant.push_back(secondary_info); - info->encoded_bytes += secondary_len; // Sum of primary and secondary. + info->encoded_bytes += + redundant_length_bytes_; // Sum of primary and secondary. + DCHECK_NE(red_payload_type_, kInvalidPayloadType) + << "Config.red_payload_type must be set for " + "AudioEncoderDecoderIsacRed."; + info->payload_type = red_payload_type_; } + { + CriticalSectionScoped cs(state_lock_.get()); + // Call the encoder's method to get redundant encoding. + redundant_length_bytes_ = T::GetRedPayload(isac_state_, redundant_payload_); + } + DCHECK_LE(redundant_length_bytes_, sizeof(redundant_payload_)); + DCHECK_GE(redundant_length_bytes_, 0u); last_encoded_timestamp_ = packet_timestamp_; return true; } @@ -191,7 +268,7 @@ int AudioEncoderDecoderIsacT::Decode(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, SpeechType* speech_type) { - CriticalSectionScoped cs(lock_.get()); + CriticalSectionScoped cs(state_lock_.get()); int16_t temp_type = 1; // Default is speech. int16_t ret = T::Decode(isac_state_, encoded, static_cast(encoded_len), @@ -205,7 +282,7 @@ int AudioEncoderDecoderIsacT::DecodeRedundant(const uint8_t* encoded, size_t encoded_len, int16_t* decoded, SpeechType* speech_type) { - CriticalSectionScoped cs(lock_.get()); + CriticalSectionScoped cs(state_lock_.get()); int16_t temp_type = 1; // Default is speech. int16_t ret = T::DecodeRcu(isac_state_, encoded, static_cast(encoded_len), @@ -221,13 +298,13 @@ bool AudioEncoderDecoderIsacT::HasDecodePlc() const { template int AudioEncoderDecoderIsacT::DecodePlc(int num_frames, int16_t* decoded) { - CriticalSectionScoped cs(lock_.get()); + CriticalSectionScoped cs(state_lock_.get()); return T::DecodePlc(isac_state_, decoded, num_frames); } template int AudioEncoderDecoderIsacT::Init() { - CriticalSectionScoped cs(lock_.get()); + CriticalSectionScoped cs(state_lock_.get()); return T::DecoderInit(isac_state_); } @@ -237,7 +314,7 @@ int AudioEncoderDecoderIsacT::IncomingPacket(const uint8_t* payload, uint16_t rtp_sequence_number, uint32_t rtp_timestamp, uint32_t arrival_timestamp) { - CriticalSectionScoped cs(lock_.get()); + CriticalSectionScoped cs(state_lock_.get()); return T::UpdateBwEstimate( isac_state_, payload, static_cast(payload_len), rtp_sequence_number, rtp_timestamp, arrival_timestamp); @@ -245,7 +322,7 @@ int AudioEncoderDecoderIsacT::IncomingPacket(const uint8_t* payload, template int AudioEncoderDecoderIsacT::ErrorCode() { - CriticalSectionScoped cs(lock_.get()); + CriticalSectionScoped cs(state_lock_.get()); return T::GetErrorCode(isac_state_); } diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h b/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h index 944152be7..4982e5a8d 100644 --- a/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h +++ b/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h @@ -18,7 +18,7 @@ namespace webrtc { struct IsacFix { typedef ISACFIX_MainStruct instance_type; - static const bool has_32kHz = false; + static const bool has_swb = false; static const bool has_redundant_encoder = false; static const uint16_t kFixSampleRate = 16000; static inline int16_t Control(instance_type* inst, @@ -105,6 +105,13 @@ struct IsacFix { FATAL() << "Should never be called."; return -1; } + static inline int16_t SetMaxPayloadSize(instance_type* inst, + int16_t max_payload_size_bytes) { + return WebRtcIsacfix_SetMaxPayloadSize(inst, max_payload_size_bytes); + } + static inline int16_t SetMaxRate(instance_type* inst, int32_t max_bit_rate) { + return WebRtcIsacfix_SetMaxRate(inst, max_bit_rate); + } }; typedef AudioEncoderDecoderIsacT AudioEncoderDecoderIsacFix; diff --git a/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h b/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h index bcfe222c2..41ed490d7 100644 --- a/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h +++ b/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h @@ -18,7 +18,7 @@ namespace webrtc { struct IsacFloat { typedef ISACStruct instance_type; - static const bool has_32kHz = true; + static const bool has_swb = true; static const bool has_redundant_encoder = false; static inline int16_t Control(instance_type* inst, int32_t rate, @@ -102,6 +102,13 @@ struct IsacFloat { FATAL() << "Should never be called."; return -1; } + static inline int16_t SetMaxPayloadSize(instance_type* inst, + int16_t max_payload_size_bytes) { + return WebRtcIsac_SetMaxPayloadSize(inst, max_payload_size_bytes); + } + static inline int16_t SetMaxRate(instance_type* inst, int32_t max_bit_rate) { + return WebRtcIsac_SetMaxRate(inst, max_bit_rate); + } }; typedef AudioEncoderDecoderIsacT AudioEncoderDecoderIsac; diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/audio_encoder_isac_red_unittest.cc b/webrtc/modules/audio_coding/codecs/isac/main/source/audio_encoder_isac_red_unittest.cc index fb6cadf47..7ff1b46d6 100644 --- a/webrtc/modules/audio_coding/codecs/isac/main/source/audio_encoder_isac_red_unittest.cc +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/audio_encoder_isac_red_unittest.cc @@ -22,6 +22,7 @@ namespace webrtc { TEST(AudioEncoderIsacRedTest, CompareRedAndNoRed) { static const int kSampleRateHz = 16000; static const int k10MsSamples = kSampleRateHz / 100; + static const int kRedPayloadType = 100; // Fill the input array with pseudo-random noise in the range [-1000, 1000]. int16_t input[k10MsSamples]; srand(1418811752); @@ -37,6 +38,9 @@ TEST(AudioEncoderIsacRedTest, CompareRedAndNoRed) { AudioEncoderDecoderIsac isac_encoder(config); AudioEncoderDecoderIsacRed::Config red_config; red_config.sample_rate_hz = kSampleRateHz; + red_config.red_payload_type = kRedPayloadType; + ASSERT_NE(red_config.red_payload_type, red_config.payload_type) + << "iSAC and RED payload types must be different."; AudioEncoderDecoderIsacRed isac_red_encoder(red_config); AudioEncoder::EncodedInfo info, red_info;