Fix a number of things in AudioEncoderDecoderIsac*

- Add max_bit_rate and max_payload_size_bytes to config structs.
- Fix support for 48 kHz sample rate.
- Fix iSAC-RED.
- Add method UpdateDecoderSampleRate().
- Update locking structure with a separate lock for local member
variables used by the encoder methods.

BUG=3926
COAUTHOR:kwiberg@webrtc.org

R=minyue@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/41659004

Cr-Commit-Position: refs/heads/master@{#8204}
git-svn-id: http://webrtc.googlecode.com/svn/trunk@8204 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
henrik.lundin@webrtc.org 2015-01-29 14:10:32 +00:00
parent 18e758526d
commit bdebccf384
5 changed files with 165 additions and 44 deletions

View File

@ -29,34 +29,45 @@ class AudioEncoderDecoderIsacT : public AudioEncoder, public AudioDecoder {
// are
// - 16000 Hz, 30 ms, 10000-32000 bps
// - 16000 Hz, 60 ms, 10000-32000 bps
// - 32000 Hz, 30 ms, 10000-56000 bps (if T has 32 kHz support)
// - 32000 Hz, 30 ms, 10000-56000 bps (if T has super-wideband support)
// - 48000 Hz, 30 ms, 10000-56000 bps (if T has super-wideband support)
struct Config {
Config();
bool IsOk() const;
int payload_type;
int red_payload_type;
int sample_rate_hz;
int frame_size_ms;
int bit_rate; // Limit on the short-term average bit rate, in bits/second.
int max_bit_rate;
int max_payload_size_bytes;
};
// For constructing an encoder in channel-adaptive mode. The sample rate must
// be 16000 Hz; the initial frame size can be 30 or 60 ms; and the initial
// bit rate can be 10000-56000 bps if T has 32 kHz support, 10000-32000 bps
// otherwise.
// For constructing an encoder in channel-adaptive mode. Allowed combinations
// are
// - 16000 Hz, 30 ms, 10000-32000 bps
// - 16000 Hz, 60 ms, 10000-32000 bps
// - 32000 Hz, 30 ms, 10000-56000 bps (if T has super-wideband support)
// - 48000 Hz, 30 ms, 10000-56000 bps (if T has super-wideband support)
struct ConfigAdaptive {
ConfigAdaptive();
bool IsOk() const;
int payload_type;
int red_payload_type;
int sample_rate_hz;
int initial_frame_size_ms;
int initial_bit_rate;
int max_bit_rate;
bool enforce_frame_size; // Prevent adaptive changes to the frame size?
int max_payload_size_bytes;
};
explicit AudioEncoderDecoderIsacT(const Config& config);
explicit AudioEncoderDecoderIsacT(const ConfigAdaptive& config);
virtual ~AudioEncoderDecoderIsacT() OVERRIDE;
void UpdateDecoderSampleRate(int sample_rate_hz);
// AudioEncoder public methods.
virtual int sample_rate_hz() const OVERRIDE;
virtual int num_channels() const OVERRIDE;
@ -91,24 +102,39 @@ class AudioEncoderDecoderIsacT : public AudioEncoder, public AudioDecoder {
EncodedInfo* info) OVERRIDE;
private:
// This value is taken from STREAM_SIZE_MAX_60 for iSAC float (60 ms) and
// STREAM_MAXW16_60MS for iSAC fix (60 ms).
static const size_t kSufficientEncodeBufferSizeBytes = 400;
const int payload_type_;
const int red_payload_type_;
// iSAC encoder/decoder state, guarded by a mutex to ensure that encode calls
// from one thread won't clash with decode calls from another thread.
// Note: PT_GUARDED_BY is disabled since it is not yet supported by clang.
const scoped_ptr<CriticalSectionWrapper> state_lock_;
typename T::instance_type* isac_state_
GUARDED_BY(state_lock_) /* PT_GUARDED_BY(lock_)*/;
// Must be acquired before state_lock_.
const scoped_ptr<CriticalSectionWrapper> lock_;
typename T::instance_type* isac_state_ GUARDED_BY(lock_);
// Have we accepted input but not yet emitted it in a packet?
bool packet_in_progress_;
// Working on the very first output frame.
bool first_output_frame_;
bool packet_in_progress_ GUARDED_BY(lock_);
// Timestamp of the first input of the currently in-progress packet.
uint32_t packet_timestamp_;
uint32_t packet_timestamp_ GUARDED_BY(lock_);
// Timestamp of the previously encoded packet.
uint32_t last_encoded_timestamp_;
uint32_t last_encoded_timestamp_ GUARDED_BY(lock_);
// Redundant encoding from last time.
// Note: If has_redundant_encoder is false, we set the array length to 1,
// since zero-length arrays are not supported by all compilers.
uint8_t redundant_payload_[T::has_redundant_encoder
? kSufficientEncodeBufferSizeBytes
: 1] GUARDED_BY(lock_);
size_t redundant_length_bytes_ GUARDED_BY(lock_);
DISALLOW_COPY_AND_ASSIGN(AudioEncoderDecoderIsacT);
};

View File

@ -13,29 +13,49 @@
#include "webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h"
#include <algorithm>
#include "webrtc/base/checks.h"
#include "webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h"
#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
namespace webrtc {
const int kIsacPayloadType = 103;
const int kInvalidPayloadType = -1;
template <typename T>
AudioEncoderDecoderIsacT<T>::Config::Config()
: payload_type(kIsacPayloadType),
red_payload_type(kInvalidPayloadType),
sample_rate_hz(16000),
frame_size_ms(30),
bit_rate(32000) {
bit_rate(32000),
max_bit_rate(-1),
max_payload_size_bytes(-1) {
}
template <typename T>
bool AudioEncoderDecoderIsacT<T>::Config::IsOk() const {
if (max_bit_rate < 32000 && max_bit_rate != -1)
return false;
if (max_payload_size_bytes < 120 && max_payload_size_bytes != -1)
return false;
switch (sample_rate_hz) {
case 16000:
if (max_bit_rate > 53400)
return false;
if (max_payload_size_bytes > 400)
return false;
return (frame_size_ms == 30 || frame_size_ms == 60) &&
bit_rate >= 10000 && bit_rate <= 32000;
case 32000:
return T::has_32kHz &&
case 48000:
if (max_bit_rate > 160000)
return false;
if (max_payload_size_bytes > 600)
return false;
return T::has_swb &&
(frame_size_ms == 30 && bit_rate >= 10000 && bit_rate <= 56000);
default:
return false;
@ -45,41 +65,76 @@ bool AudioEncoderDecoderIsacT<T>::Config::IsOk() const {
template <typename T>
AudioEncoderDecoderIsacT<T>::ConfigAdaptive::ConfigAdaptive()
: payload_type(kIsacPayloadType),
red_payload_type(kInvalidPayloadType),
sample_rate_hz(16000),
initial_frame_size_ms(30),
initial_bit_rate(32000),
enforce_frame_size(false) {
max_bit_rate(-1),
enforce_frame_size(false),
max_payload_size_bytes(-1) {
}
template <typename T>
bool AudioEncoderDecoderIsacT<T>::ConfigAdaptive::IsOk() const {
static const int max_rate = T::has_32kHz ? 56000 : 32000;
return sample_rate_hz == 16000 &&
(initial_frame_size_ms == 30 || initial_frame_size_ms == 60) &&
initial_bit_rate >= 10000 && initial_bit_rate <= max_rate;
if (max_bit_rate < 32000 && max_bit_rate != -1)
return false;
if (max_payload_size_bytes < 120 && max_payload_size_bytes != -1)
return false;
switch (sample_rate_hz) {
case 16000:
if (max_bit_rate > 53400)
return false;
if (max_payload_size_bytes > 400)
return false;
return (initial_frame_size_ms == 30 || initial_frame_size_ms == 60) &&
initial_bit_rate >= 10000 && initial_bit_rate <= 32000;
case 32000:
case 48000:
if (max_bit_rate > 160000)
return false;
if (max_payload_size_bytes > 600)
return false;
return T::has_swb &&
(initial_frame_size_ms == 30 && initial_bit_rate >= 10000 &&
initial_bit_rate <= 56000);
default:
return false;
}
}
template <typename T>
AudioEncoderDecoderIsacT<T>::AudioEncoderDecoderIsacT(const Config& config)
: payload_type_(config.payload_type),
red_payload_type_(config.red_payload_type),
state_lock_(CriticalSectionWrapper::CreateCriticalSection()),
lock_(CriticalSectionWrapper::CreateCriticalSection()),
packet_in_progress_(false),
first_output_frame_(true) {
redundant_length_bytes_(0) {
CHECK(config.IsOk());
CHECK_EQ(0, T::Create(&isac_state_));
CHECK_EQ(0, T::EncoderInit(isac_state_, 1));
CHECK_EQ(0, T::SetEncSampRate(isac_state_, config.sample_rate_hz));
CHECK_EQ(0, T::Control(isac_state_, config.bit_rate, config.frame_size_ms));
CHECK_EQ(0, T::SetDecSampRate(isac_state_, config.sample_rate_hz));
// When config.sample_rate_hz is set to 48000 Hz (iSAC-fb), the decoder is
// still set to 32000 Hz, since there is no full-band mode in the decoder.
CHECK_EQ(0, T::SetDecSampRate(isac_state_,
std::min(config.sample_rate_hz, 32000)));
if (config.max_payload_size_bytes != -1)
CHECK_EQ(0,
T::SetMaxPayloadSize(isac_state_, config.max_payload_size_bytes));
if (config.max_bit_rate != -1)
CHECK_EQ(0, T::SetMaxRate(isac_state_, config.max_bit_rate));
}
template <typename T>
AudioEncoderDecoderIsacT<T>::AudioEncoderDecoderIsacT(
const ConfigAdaptive& config)
: payload_type_(config.payload_type),
red_payload_type_(config.red_payload_type),
state_lock_(CriticalSectionWrapper::CreateCriticalSection()),
lock_(CriticalSectionWrapper::CreateCriticalSection()),
packet_in_progress_(false),
first_output_frame_(true) {
redundant_length_bytes_(0) {
CHECK(config.IsOk());
CHECK_EQ(0, T::Create(&isac_state_));
CHECK_EQ(0, T::EncoderInit(isac_state_, 0));
@ -88,6 +143,11 @@ AudioEncoderDecoderIsacT<T>::AudioEncoderDecoderIsacT(
config.initial_frame_size_ms,
config.enforce_frame_size));
CHECK_EQ(0, T::SetDecSampRate(isac_state_, config.sample_rate_hz));
if (config.max_payload_size_bytes != -1)
CHECK_EQ(0,
T::SetMaxPayloadSize(isac_state_, config.max_payload_size_bytes));
if (config.max_bit_rate != -1)
CHECK_EQ(0, T::SetMaxRate(isac_state_, config.max_bit_rate));
}
template <typename T>
@ -95,9 +155,15 @@ AudioEncoderDecoderIsacT<T>::~AudioEncoderDecoderIsacT() {
CHECK_EQ(0, T::Free(isac_state_));
}
template <typename T>
void AudioEncoderDecoderIsacT<T>::UpdateDecoderSampleRate(int sample_rate_hz) {
CriticalSectionScoped cs(state_lock_.get());
CHECK_EQ(0, T::SetDecSampRate(isac_state_, sample_rate_hz));
}
template <typename T>
int AudioEncoderDecoderIsacT<T>::sample_rate_hz() const {
CriticalSectionScoped cs(lock_.get());
CriticalSectionScoped cs(state_lock_.get());
return T::EncSampRate(isac_state_);
}
@ -108,7 +174,7 @@ int AudioEncoderDecoderIsacT<T>::num_channels() const {
template <typename T>
int AudioEncoderDecoderIsacT<T>::Num10MsFramesInNextPacket() const {
CriticalSectionScoped cs(lock_.get());
CriticalSectionScoped cs(state_lock_.get());
const int samples_in_next_packet = T::GetNewFrameLen(isac_state_);
return rtc::CheckedDivExact(samples_in_next_packet,
rtc::CheckedDivExact(sample_rate_hz(), 100));
@ -125,6 +191,7 @@ bool AudioEncoderDecoderIsacT<T>::EncodeInternal(uint32_t rtp_timestamp,
size_t max_encoded_bytes,
uint8_t* encoded,
EncodedInfo* info) {
CriticalSectionScoped cs(lock_.get());
if (!packet_in_progress_) {
// Starting a new packet; remember the timestamp for later.
packet_in_progress_ = true;
@ -132,7 +199,7 @@ bool AudioEncoderDecoderIsacT<T>::EncodeInternal(uint32_t rtp_timestamp,
}
int r;
{
CriticalSectionScoped cs(lock_.get());
CriticalSectionScoped cs(state_lock_.get());
r = T::Encode(isac_state_, audio, encoded);
}
if (r < 0) {
@ -158,30 +225,40 @@ bool AudioEncoderDecoderIsacT<T>::EncodeInternal(uint32_t rtp_timestamp,
if (!T::has_redundant_encoder)
return true;
if (first_output_frame_) {
if (redundant_length_bytes_ == 0) {
// Do not emit the first output frame when using redundant encoding.
info->encoded_bytes = 0;
first_output_frame_ = false;
} else {
// Call the encoder's method to get redundant encoding.
// When a redundant payload from the last Encode call is available, the
// resulting payload consists of the primary encoding followed by the
// redundant encoding from last time.
const size_t primary_length = info->encoded_bytes;
int16_t secondary_len;
{
CriticalSectionScoped cs(lock_.get());
secondary_len = T::GetRedPayload(isac_state_, &encoded[primary_length]);
}
DCHECK_GE(secondary_len, 0);
memcpy(&encoded[primary_length], redundant_payload_,
redundant_length_bytes_);
// The EncodedInfo struct |info| will have one root node and two leaves.
// |info| will be implicitly cast to an EncodedInfoLeaf struct, effectively
// discarding the (empty) vector of redundant information. This is
// intentional.
info->redundant.push_back(*info);
EncodedInfoLeaf secondary_info;
secondary_info.payload_type = info->payload_type;
secondary_info.encoded_bytes = secondary_len;
secondary_info.encoded_bytes = redundant_length_bytes_;
secondary_info.encoded_timestamp = last_encoded_timestamp_;
info->redundant.push_back(secondary_info);
info->encoded_bytes += secondary_len; // Sum of primary and secondary.
info->encoded_bytes +=
redundant_length_bytes_; // Sum of primary and secondary.
DCHECK_NE(red_payload_type_, kInvalidPayloadType)
<< "Config.red_payload_type must be set for "
"AudioEncoderDecoderIsacRed.";
info->payload_type = red_payload_type_;
}
{
CriticalSectionScoped cs(state_lock_.get());
// Call the encoder's method to get redundant encoding.
redundant_length_bytes_ = T::GetRedPayload(isac_state_, redundant_payload_);
}
DCHECK_LE(redundant_length_bytes_, sizeof(redundant_payload_));
DCHECK_GE(redundant_length_bytes_, 0u);
last_encoded_timestamp_ = packet_timestamp_;
return true;
}
@ -191,7 +268,7 @@ int AudioEncoderDecoderIsacT<T>::Decode(const uint8_t* encoded,
size_t encoded_len,
int16_t* decoded,
SpeechType* speech_type) {
CriticalSectionScoped cs(lock_.get());
CriticalSectionScoped cs(state_lock_.get());
int16_t temp_type = 1; // Default is speech.
int16_t ret =
T::Decode(isac_state_, encoded, static_cast<int16_t>(encoded_len),
@ -205,7 +282,7 @@ int AudioEncoderDecoderIsacT<T>::DecodeRedundant(const uint8_t* encoded,
size_t encoded_len,
int16_t* decoded,
SpeechType* speech_type) {
CriticalSectionScoped cs(lock_.get());
CriticalSectionScoped cs(state_lock_.get());
int16_t temp_type = 1; // Default is speech.
int16_t ret =
T::DecodeRcu(isac_state_, encoded, static_cast<int16_t>(encoded_len),
@ -221,13 +298,13 @@ bool AudioEncoderDecoderIsacT<T>::HasDecodePlc() const {
template <typename T>
int AudioEncoderDecoderIsacT<T>::DecodePlc(int num_frames, int16_t* decoded) {
CriticalSectionScoped cs(lock_.get());
CriticalSectionScoped cs(state_lock_.get());
return T::DecodePlc(isac_state_, decoded, num_frames);
}
template <typename T>
int AudioEncoderDecoderIsacT<T>::Init() {
CriticalSectionScoped cs(lock_.get());
CriticalSectionScoped cs(state_lock_.get());
return T::DecoderInit(isac_state_);
}
@ -237,7 +314,7 @@ int AudioEncoderDecoderIsacT<T>::IncomingPacket(const uint8_t* payload,
uint16_t rtp_sequence_number,
uint32_t rtp_timestamp,
uint32_t arrival_timestamp) {
CriticalSectionScoped cs(lock_.get());
CriticalSectionScoped cs(state_lock_.get());
return T::UpdateBwEstimate(
isac_state_, payload, static_cast<int32_t>(payload_len),
rtp_sequence_number, rtp_timestamp, arrival_timestamp);
@ -245,7 +322,7 @@ int AudioEncoderDecoderIsacT<T>::IncomingPacket(const uint8_t* payload,
template <typename T>
int AudioEncoderDecoderIsacT<T>::ErrorCode() {
CriticalSectionScoped cs(lock_.get());
CriticalSectionScoped cs(state_lock_.get());
return T::GetErrorCode(isac_state_);
}

View File

@ -18,7 +18,7 @@ namespace webrtc {
struct IsacFix {
typedef ISACFIX_MainStruct instance_type;
static const bool has_32kHz = false;
static const bool has_swb = false;
static const bool has_redundant_encoder = false;
static const uint16_t kFixSampleRate = 16000;
static inline int16_t Control(instance_type* inst,
@ -105,6 +105,13 @@ struct IsacFix {
FATAL() << "Should never be called.";
return -1;
}
static inline int16_t SetMaxPayloadSize(instance_type* inst,
int16_t max_payload_size_bytes) {
return WebRtcIsacfix_SetMaxPayloadSize(inst, max_payload_size_bytes);
}
static inline int16_t SetMaxRate(instance_type* inst, int32_t max_bit_rate) {
return WebRtcIsacfix_SetMaxRate(inst, max_bit_rate);
}
};
typedef AudioEncoderDecoderIsacT<IsacFix> AudioEncoderDecoderIsacFix;

View File

@ -18,7 +18,7 @@ namespace webrtc {
struct IsacFloat {
typedef ISACStruct instance_type;
static const bool has_32kHz = true;
static const bool has_swb = true;
static const bool has_redundant_encoder = false;
static inline int16_t Control(instance_type* inst,
int32_t rate,
@ -102,6 +102,13 @@ struct IsacFloat {
FATAL() << "Should never be called.";
return -1;
}
static inline int16_t SetMaxPayloadSize(instance_type* inst,
int16_t max_payload_size_bytes) {
return WebRtcIsac_SetMaxPayloadSize(inst, max_payload_size_bytes);
}
static inline int16_t SetMaxRate(instance_type* inst, int32_t max_bit_rate) {
return WebRtcIsac_SetMaxRate(inst, max_bit_rate);
}
};
typedef AudioEncoderDecoderIsacT<IsacFloat> AudioEncoderDecoderIsac;

View File

@ -22,6 +22,7 @@ namespace webrtc {
TEST(AudioEncoderIsacRedTest, CompareRedAndNoRed) {
static const int kSampleRateHz = 16000;
static const int k10MsSamples = kSampleRateHz / 100;
static const int kRedPayloadType = 100;
// Fill the input array with pseudo-random noise in the range [-1000, 1000].
int16_t input[k10MsSamples];
srand(1418811752);
@ -37,6 +38,9 @@ TEST(AudioEncoderIsacRedTest, CompareRedAndNoRed) {
AudioEncoderDecoderIsac isac_encoder(config);
AudioEncoderDecoderIsacRed::Config red_config;
red_config.sample_rate_hz = kSampleRateHz;
red_config.red_payload_type = kRedPayloadType;
ASSERT_NE(red_config.red_payload_type, red_config.payload_type)
<< "iSAC and RED payload types must be different.";
AudioEncoderDecoderIsacRed isac_red_encoder(red_config);
AudioEncoder::EncodedInfo info, red_info;