Files
webrtc/webrtc/modules/audio_coding/main/acm2/acm_generic_codec.h
minyue@webrtc.org e16bfde512 Adding flag to force Opus application and DTX while toggling.
Currently, we only allow Opus DTX in combination with Opus kVoip mode. When one of them is toggled, the other might need to change as well. This CL is to introduce a flag to force a co-config.

BUG=
R=henrik.lundin@webrtc.org, tina.legrand@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/40159004

Cr-Commit-Position: refs/heads/master@{#8698}
git-svn-id: http://webrtc.googlecode.com/svn/trunk@8698 4adac7df-926f-26a2-2b94-8c16560cd09d
2015-03-12 15:29:23 +00:00

492 lines
19 KiB
C++

/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_GENERIC_CODEC_H_
#define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_GENERIC_CODEC_H_
#include <map>
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/base/thread_annotations.h"
#include "webrtc/modules/audio_coding/main/interface/audio_coding_module_typedefs.h"
#include "webrtc/modules/audio_coding/codecs/audio_decoder.h"
#include "webrtc/modules/audio_coding/codecs/audio_encoder.h"
#include "webrtc/modules/audio_coding/main/acm2/acm_common_defs.h"
#include "webrtc/modules/audio_coding/neteq/interface/neteq.h"
#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
#include "webrtc/system_wrappers/interface/rw_lock_wrapper.h"
#include "webrtc/system_wrappers/interface/trace.h"
#define MAX_FRAME_SIZE_10MSEC 6
// forward declaration
struct WebRtcVadInst;
struct WebRtcCngEncInst;
namespace webrtc {
struct WebRtcACMCodecParams;
struct CodecInst;
namespace acm2 {
// forward declaration
class AcmReceiver;
// Proxy for AudioDecoder
class AudioDecoderProxy final : public AudioDecoder {
public:
AudioDecoderProxy();
void SetDecoder(AudioDecoder* decoder);
bool IsSet() const;
int Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override;
int DecodeRedundant(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type) override;
bool HasDecodePlc() const override;
int DecodePlc(int num_frames, int16_t* decoded) override;
int Init() override;
int IncomingPacket(const uint8_t* payload,
size_t payload_len,
uint16_t rtp_sequence_number,
uint32_t rtp_timestamp,
uint32_t arrival_timestamp) override;
int ErrorCode() override;
int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override;
int PacketDurationRedundant(const uint8_t* encoded,
size_t encoded_len) const override;
bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const override;
CNG_dec_inst* CngDecoderInstance() override;
private:
rtc::scoped_ptr<CriticalSectionWrapper> decoder_lock_;
AudioDecoder* decoder_ GUARDED_BY(decoder_lock_);
};
class ACMGenericCodec {
public:
ACMGenericCodec(const CodecInst& codec_inst,
int cng_pt_nb,
int cng_pt_wb,
int cng_pt_swb,
int cng_pt_fb,
bool enable_red,
int red_payload_type);
~ACMGenericCodec();
///////////////////////////////////////////////////////////////////////////
// ACMGenericCodec* CreateInstance();
// The function will be used for FEC. It is not implemented yet.
//
ACMGenericCodec* CreateInstance();
///////////////////////////////////////////////////////////////////////////
// int16_t Encode()
// The function is called to perform an encoding of the audio stored in
// audio buffer. An encoding is performed only if enough audio, i.e. equal
// to the frame-size of the codec, exist. The audio frame will be processed
// by VAD and CN/DTX if required. There are few different cases.
//
// A) Neither VAD nor DTX is active; the frame is encoded by the encoder.
//
// B) VAD is enabled but not DTX; in this case the audio is processed by VAD
// and encoded by the encoder. The "*encoding_type" will be either
// "kActiveNormalEncode" or "kPassiveNormalEncode" if frame is active or
// passive, respectively.
//
// C) DTX is enabled; if the codec has internal VAD/DTX we just encode the
// frame by the encoder. Otherwise, the frame is passed through VAD and
// if identified as passive, then it will be processed by CN/DTX. If the
// frame is active it will be encoded by the encoder.
//
// This function acquires the appropriate locks and calls EncodeSafe() for
// the actual processing.
//
// Outputs:
// -bitstream : a buffer where bit-stream will be written to.
// -bitstream_len_byte : contains the length of the bit-stream in
// bytes.
// -timestamp : contains the RTP timestamp, this is the
// sampling time of the first sample encoded
// (measured in number of samples).
//
//
void Encode(uint32_t input_timestamp,
const int16_t* audio,
uint16_t length_per_channel,
uint8_t audio_channel,
uint8_t* bitstream,
int16_t* bitstream_len_byte,
AudioEncoder::EncodedInfo* encoded_info);
///////////////////////////////////////////////////////////////////////////
// bool EncoderInitialized();
//
// Return value:
// True if the encoder is successfully initialized,
// false otherwise.
//
bool EncoderInitialized();
///////////////////////////////////////////////////////////////////////////
// int16_t EncoderParams()
// It is called to get encoder parameters. It will call
// EncoderParamsSafe() in turn.
//
// Output:
// -enc_params : a buffer where the encoder parameters is
// written to. If the encoder is not
// initialized this buffer is filled with
// invalid values
// Return value:
// -1 if the encoder is not initialized,
// 0 otherwise.
//
int16_t EncoderParams(WebRtcACMCodecParams* enc_params);
///////////////////////////////////////////////////////////////////////////
// int16_t InitEncoder(...)
// This function is called to initialize the encoder with the given
// parameters.
//
// Input:
// -codec_params : parameters of encoder.
// -force_initialization: if false the initialization is invoked only if
// the encoder is not initialized. If true the
// encoder is forced to (re)initialize.
//
// Return value:
// 0 if could initialize successfully,
// -1 if failed to initialize.
//
//
int16_t InitEncoder(WebRtcACMCodecParams* codec_params,
bool force_initialization);
///////////////////////////////////////////////////////////////////////////
// uint32_t NoMissedSamples()
// This function returns the number of samples which are overwritten in
// the audio buffer. The audio samples are overwritten if the input audio
// buffer is full, but Add10MsData() is called. (We might remove this
// function if it is not used)
//
// Return Value:
// Number of samples which are overwritten.
//
uint32_t NoMissedSamples() const;
///////////////////////////////////////////////////////////////////////////
// void ResetNoMissedSamples()
// This function resets the number of overwritten samples to zero.
// (We might remove this function if we remove NoMissedSamples())
//
void ResetNoMissedSamples();
///////////////////////////////////////////////////////////////////////////
// int16_t SetBitRate()
// The function is called to set the encoding rate.
//
// Input:
// -bitrate_bps : encoding rate in bits per second
//
// Return value:
// -1 if failed to set the rate, due to invalid input or given
// codec is not rate-adjustable.
// 0 if the rate is adjusted successfully
//
int16_t SetBitRate(const int32_t bitrate_bps);
///////////////////////////////////////////////////////////////////////////
// uint32_t EarliestTimestamp()
// Returns the timestamp of the first 10 ms in audio buffer. This is used
// to identify if a synchronization of two encoders is required.
//
// Return value:
// timestamp of the first 10 ms audio in the audio buffer.
//
uint32_t EarliestTimestamp() const;
///////////////////////////////////////////////////////////////////////////
// int16_t SetVAD()
// This is called to set VAD & DTX. If the codec has internal DTX, it will
// be used. If DTX is enabled and the codec does not have internal DTX,
// WebRtc-VAD will be used to decide if the frame is active. If DTX is
// disabled but VAD is enabled, the audio is passed through VAD to label it
// as active or passive, but the frame is encoded normally. However the
// bit-stream is labeled properly so that ACM::Process() can use this
// information. In case of failure, the previous states of the VAD & DTX
// are kept.
//
// Inputs/Output:
// -enable_dtx : if true DTX will be enabled otherwise the DTX is
// disabled. If codec has internal DTX that will be
// used, otherwise WebRtc-CNG is used. In the latter
// case VAD is automatically activated.
// -enable_vad : if true WebRtc-VAD is enabled, otherwise VAD is
// disabled, except for the case that DTX is enabled
// but codec doesn't have internal DTX. In this case
// VAD is enabled regardless of the value of
// |enable_vad|.
// -mode : this specifies the aggressiveness of VAD.
//
// Return value
// -1 if failed to set DTX & VAD as specified,
// 0 if succeeded.
//
int16_t SetVAD(bool* enable_dtx, bool* enable_vad, ACMVADMode* mode);
// Registers comfort noise at |sample_rate_hz| to use |payload_type|.
void SetCngPt(int sample_rate_hz, int payload_type);
///////////////////////////////////////////////////////////////////////////
// UpdateEncoderSampFreq()
// Call this function to update the encoder sampling frequency. This
// is for codecs where one payload-name supports several encoder sampling
// frequencies. Otherwise, to change the sampling frequency we need to
// register new codec. ACM will consider that as registration of a new
// codec, not a change in parameter. For iSAC, switching from WB to SWB
// is treated as a change in parameter. Therefore, we need this function.
//
// Input:
// -samp_freq_hz : encoder sampling frequency.
//
// Return value:
// -1 if failed, or if this is meaningless for the given codec.
// 0 if succeeded.
//
int16_t UpdateEncoderSampFreq(uint16_t samp_freq_hz)
EXCLUSIVE_LOCKS_REQUIRED(codec_wrapper_lock_);
///////////////////////////////////////////////////////////////////////////
// EncoderSampFreq()
// Get the sampling frequency that the encoder (WebRtc wrapper) expects.
//
// Output:
// -samp_freq_hz : sampling frequency, in Hertz, which the encoder
// should be fed with.
//
// Return value:
// -1 if failed to output sampling rate.
// 0 if the sample rate is returned successfully.
//
int16_t EncoderSampFreq(uint16_t* samp_freq_hz)
SHARED_LOCKS_REQUIRED(codec_wrapper_lock_);
///////////////////////////////////////////////////////////////////////////
// SetISACMaxPayloadSize()
// Set the maximum payload size of iSAC packets. No iSAC payload,
// regardless of its frame-size, may exceed the given limit. For
// an iSAC payload of size B bits and frame-size T sec we have;
// (B < max_payload_len_bytes * 8) and (B/T < max_rate_bit_per_sec), c.f.
// SetISACMaxRate().
//
// Input:
// -max_payload_len_bytes : maximum payload size in bytes.
//
// Return value:
// -1 if failed to set the maximum payload-size.
// 0 if the given length is set successfully.
//
int32_t SetISACMaxPayloadSize(const uint16_t max_payload_len_bytes);
///////////////////////////////////////////////////////////////////////////
// SetISACMaxRate()
// Set the maximum instantaneous rate of iSAC. For a payload of B bits
// with a frame-size of T sec the instantaneous rate is B/T bits per
// second. Therefore, (B/T < max_rate_bit_per_sec) and
// (B < max_payload_len_bytes * 8) are always satisfied for iSAC payloads,
// c.f SetISACMaxPayloadSize().
//
// Input:
// -max_rate_bps : maximum instantaneous bit-rate given in bits/sec.
//
// Return value:
// -1 if failed to set the maximum rate.
// 0 if the maximum rate is set successfully.
//
int32_t SetISACMaxRate(const uint32_t max_rate_bps);
///////////////////////////////////////////////////////////////////////////
// int SetOpusApplication(OpusApplicationMode application,
// bool disable_dtx_if_needed)
// Sets the intended application for the Opus encoder. Opus uses this to
// optimize the encoding for applications like VOIP and music. Currently, two
// modes are supported: kVoip and kAudio. kAudio is only allowed when Opus
// DTX is switched off. If DTX is on, and |application| == kAudio, a failure
// will be triggered unless |disable_dtx_if_needed| == true, for which, the
// DTX will be forced off.
//
// Input:
// - application : intended application.
// - disable_dtx_if_needed : whether to force Opus DTX to stop when needed.
//
// Return value:
// -1 if failed or on codecs other than Opus.
// 0 if succeeded.
//
int SetOpusApplication(OpusApplicationMode application,
bool disable_dtx_if_needed);
///////////////////////////////////////////////////////////////////////////
// int SetOpusMaxPlaybackRate()
// Sets maximum playback rate the receiver will render, if the codec is Opus.
// This is to tell Opus that it is enough to code the input audio up to a
// bandwidth. Opus can take this information to optimize the bit rate and
// increase the computation efficiency.
//
// Input:
// -frequency_hz : maximum playback rate in Hz.
//
// Return value:
// -1 if failed or on codecs other than Opus.
// 0 if succeeded.
//
int SetOpusMaxPlaybackRate(int /* frequency_hz */);
///////////////////////////////////////////////////////////////////////////
// EnableOpusDtx(bool force_voip)
// Enable the DTX, if the codec is Opus. Currently, DTX can only be enabled
// when the application mode is kVoip. If |force_voip| == true, the
// application mode will be forced to kVoip. Otherwise, a failure will be
// triggered if current application mode is kAudio.
// Input:
// - force_voip : whether to force application mode to kVoip.
// Return value:
// -1 if failed or on codecs other than Opus.
// 0 if succeeded.
//
int EnableOpusDtx(bool force_voip);
///////////////////////////////////////////////////////////////////////////
// DisbleOpusDtx()
// Disable the DTX, if the codec is Opus.
// Return value:
// -1 if failed or on codecs other than Opus.
// 0 if succeeded.
//
int DisableOpusDtx();
///////////////////////////////////////////////////////////////////////////
// HasFrameToEncode()
// Returns true if there is enough audio buffered for encoding, such that
// calling Encode() will return a payload.
//
bool HasFrameToEncode() const;
// Returns a pointer to the AudioDecoder part of a joint encoder-decoder
// object, if it exists. Otherwise, nullptr is returned.
AudioDecoder* Decoder();
///////////////////////////////////////////////////////////////////////////
// bool HasInternalFEC()
// Used to check if the codec has internal FEC.
//
// Return value:
// true if the codec has an internal FEC, e.g. Opus.
// false otherwise.
//
bool HasInternalFEC() const {
ReadLockScoped rl(codec_wrapper_lock_);
return has_internal_fec_;
}
///////////////////////////////////////////////////////////////////////////
// int SetFEC();
// Sets the codec internal FEC. No effects on codecs that do not provide
// internal FEC.
//
// Input:
// -enable_fec : if true FEC will be enabled otherwise the FEC is
// disabled.
//
// Return value:
// -1 if failed,
// 0 if succeeded.
//
int SetFEC(bool enable_fec);
///////////////////////////////////////////////////////////////////////////
// int SetPacketLossRate()
// Sets expected packet loss rate for encoding. Some encoders provide packet
// loss gnostic encoding to make stream less sensitive to packet losses,
// through e.g., FEC. No effects on codecs that do not provide such encoding.
//
// Input:
// -loss_rate : expected packet loss rate (0 -- 100 inclusive).
//
// Return value:
// -1 if failed,
// 0 if succeeded or packet loss rate is ignored.
//
int SetPacketLossRate(int /* loss_rate */);
// Sets if CopyRed should be enabled.
void EnableCopyRed(bool enable, int red_payload_type);
// This method is only for testing.
const AudioEncoder* GetAudioEncoder() const;
private:
bool has_internal_fec_ GUARDED_BY(codec_wrapper_lock_);
bool copy_red_enabled_ GUARDED_BY(codec_wrapper_lock_);
WebRtcACMCodecParams encoder_params_ GUARDED_BY(codec_wrapper_lock_);
// Used to lock wrapper internal data
// such as buffers and state variables.
RWLockWrapper& codec_wrapper_lock_;
uint32_t last_timestamp_ GUARDED_BY(codec_wrapper_lock_);
uint32_t unique_id_;
void ResetAudioEncoder() EXCLUSIVE_LOCKS_REQUIRED(codec_wrapper_lock_);
OpusApplicationMode GetOpusApplication(int num_channels,
bool enable_dtx) const
EXCLUSIVE_LOCKS_REQUIRED(codec_wrapper_lock_);
rtc::scoped_ptr<AudioEncoder> audio_encoder_ GUARDED_BY(codec_wrapper_lock_);
rtc::scoped_ptr<AudioEncoder> cng_encoder_ GUARDED_BY(codec_wrapper_lock_);
rtc::scoped_ptr<AudioEncoder> red_encoder_ GUARDED_BY(codec_wrapper_lock_);
AudioEncoder* encoder_ GUARDED_BY(codec_wrapper_lock_);
AudioDecoderProxy decoder_proxy_ GUARDED_BY(codec_wrapper_lock_);
WebRtcACMCodecParams acm_codec_params_ GUARDED_BY(codec_wrapper_lock_);
int bitrate_bps_ GUARDED_BY(codec_wrapper_lock_);
bool fec_enabled_ GUARDED_BY(codec_wrapper_lock_);
int loss_rate_ GUARDED_BY(codec_wrapper_lock_);
int max_playback_rate_hz_ GUARDED_BY(codec_wrapper_lock_);
int max_payload_size_bytes_ GUARDED_BY(codec_wrapper_lock_);
int max_rate_bps_ GUARDED_BY(codec_wrapper_lock_);
bool opus_dtx_enabled_ GUARDED_BY(codec_wrapper_lock_);
bool is_opus_ GUARDED_BY(codec_wrapper_lock_);
bool is_isac_ GUARDED_BY(codec_wrapper_lock_);
bool first_frame_ GUARDED_BY(codec_wrapper_lock_);
uint32_t rtp_timestamp_ GUARDED_BY(codec_wrapper_lock_);
uint32_t last_rtp_timestamp_ GUARDED_BY(codec_wrapper_lock_);
// Map from payload type to CNG sample rate (Hz).
std::map<int, int> cng_pt_ GUARDED_BY(codec_wrapper_lock_);
int red_payload_type_ GUARDED_BY(codec_wrapper_lock_);
OpusApplicationMode opus_application_ GUARDED_BY(codec_wrapper_lock_);
bool opus_application_set_ GUARDED_BY(codec_wrapper_lock_);
};
} // namespace acm2
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_GENERIC_CODEC_H_