/* * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_GENERIC_CODEC_H_ #define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_GENERIC_CODEC_H_ #include #include "webrtc/base/scoped_ptr.h" #include "webrtc/base/thread_annotations.h" #include "webrtc/modules/audio_coding/main/interface/audio_coding_module_typedefs.h" #include "webrtc/modules/audio_coding/codecs/audio_decoder.h" #include "webrtc/modules/audio_coding/codecs/audio_encoder.h" #include "webrtc/modules/audio_coding/main/acm2/acm_common_defs.h" #include "webrtc/modules/audio_coding/neteq/interface/neteq.h" #include "webrtc/system_wrappers/interface/critical_section_wrapper.h" #include "webrtc/system_wrappers/interface/rw_lock_wrapper.h" #include "webrtc/system_wrappers/interface/trace.h" #define MAX_FRAME_SIZE_10MSEC 6 // forward declaration struct WebRtcVadInst; struct WebRtcCngEncInst; namespace webrtc { struct WebRtcACMCodecParams; struct CodecInst; namespace acm2 { // forward declaration class AcmReceiver; // Proxy for AudioDecoder class AudioDecoderProxy final : public AudioDecoder { public: AudioDecoderProxy(); void SetDecoder(AudioDecoder* decoder); bool IsSet() const; int Decode(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, int16_t* decoded, SpeechType* speech_type) override; int DecodeRedundant(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, int16_t* decoded, SpeechType* speech_type) override; bool HasDecodePlc() const override; int DecodePlc(int num_frames, int16_t* decoded) override; int Init() override; int IncomingPacket(const uint8_t* payload, size_t payload_len, uint16_t rtp_sequence_number, uint32_t rtp_timestamp, uint32_t arrival_timestamp) override; int ErrorCode() override; int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override; int PacketDurationRedundant(const uint8_t* encoded, size_t encoded_len) const override; bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const override; CNG_dec_inst* CngDecoderInstance() override; private: rtc::scoped_ptr decoder_lock_; AudioDecoder* decoder_ GUARDED_BY(decoder_lock_); }; class ACMGenericCodec { public: ACMGenericCodec(const CodecInst& codec_inst, int cng_pt_nb, int cng_pt_wb, int cng_pt_swb, int cng_pt_fb, bool enable_red, int red_payload_type); ~ACMGenericCodec(); /////////////////////////////////////////////////////////////////////////// // ACMGenericCodec* CreateInstance(); // The function will be used for FEC. It is not implemented yet. // ACMGenericCodec* CreateInstance(); /////////////////////////////////////////////////////////////////////////// // int16_t Encode() // The function is called to perform an encoding of the audio stored in // audio buffer. An encoding is performed only if enough audio, i.e. equal // to the frame-size of the codec, exist. The audio frame will be processed // by VAD and CN/DTX if required. There are few different cases. // // A) Neither VAD nor DTX is active; the frame is encoded by the encoder. // // B) VAD is enabled but not DTX; in this case the audio is processed by VAD // and encoded by the encoder. The "*encoding_type" will be either // "kActiveNormalEncode" or "kPassiveNormalEncode" if frame is active or // passive, respectively. // // C) DTX is enabled; if the codec has internal VAD/DTX we just encode the // frame by the encoder. Otherwise, the frame is passed through VAD and // if identified as passive, then it will be processed by CN/DTX. If the // frame is active it will be encoded by the encoder. // // This function acquires the appropriate locks and calls EncodeSafe() for // the actual processing. // // Outputs: // -bitstream : a buffer where bit-stream will be written to. // -bitstream_len_byte : contains the length of the bit-stream in // bytes. // -timestamp : contains the RTP timestamp, this is the // sampling time of the first sample encoded // (measured in number of samples). // // void Encode(uint32_t input_timestamp, const int16_t* audio, uint16_t length_per_channel, uint8_t audio_channel, uint8_t* bitstream, int16_t* bitstream_len_byte, AudioEncoder::EncodedInfo* encoded_info); /////////////////////////////////////////////////////////////////////////// // bool EncoderInitialized(); // // Return value: // True if the encoder is successfully initialized, // false otherwise. // bool EncoderInitialized(); /////////////////////////////////////////////////////////////////////////// // int16_t EncoderParams() // It is called to get encoder parameters. It will call // EncoderParamsSafe() in turn. // // Output: // -enc_params : a buffer where the encoder parameters is // written to. If the encoder is not // initialized this buffer is filled with // invalid values // Return value: // -1 if the encoder is not initialized, // 0 otherwise. // int16_t EncoderParams(WebRtcACMCodecParams* enc_params); /////////////////////////////////////////////////////////////////////////// // int16_t InitEncoder(...) // This function is called to initialize the encoder with the given // parameters. // // Input: // -codec_params : parameters of encoder. // -force_initialization: if false the initialization is invoked only if // the encoder is not initialized. If true the // encoder is forced to (re)initialize. // // Return value: // 0 if could initialize successfully, // -1 if failed to initialize. // // int16_t InitEncoder(WebRtcACMCodecParams* codec_params, bool force_initialization); /////////////////////////////////////////////////////////////////////////// // uint32_t NoMissedSamples() // This function returns the number of samples which are overwritten in // the audio buffer. The audio samples are overwritten if the input audio // buffer is full, but Add10MsData() is called. (We might remove this // function if it is not used) // // Return Value: // Number of samples which are overwritten. // uint32_t NoMissedSamples() const; /////////////////////////////////////////////////////////////////////////// // void ResetNoMissedSamples() // This function resets the number of overwritten samples to zero. // (We might remove this function if we remove NoMissedSamples()) // void ResetNoMissedSamples(); /////////////////////////////////////////////////////////////////////////// // int16_t SetBitRate() // The function is called to set the encoding rate. // // Input: // -bitrate_bps : encoding rate in bits per second // // Return value: // -1 if failed to set the rate, due to invalid input or given // codec is not rate-adjustable. // 0 if the rate is adjusted successfully // int16_t SetBitRate(const int32_t bitrate_bps); /////////////////////////////////////////////////////////////////////////// // uint32_t EarliestTimestamp() // Returns the timestamp of the first 10 ms in audio buffer. This is used // to identify if a synchronization of two encoders is required. // // Return value: // timestamp of the first 10 ms audio in the audio buffer. // uint32_t EarliestTimestamp() const; /////////////////////////////////////////////////////////////////////////// // int16_t SetVAD() // This is called to set VAD & DTX. If the codec has internal DTX, it will // be used. If DTX is enabled and the codec does not have internal DTX, // WebRtc-VAD will be used to decide if the frame is active. If DTX is // disabled but VAD is enabled, the audio is passed through VAD to label it // as active or passive, but the frame is encoded normally. However the // bit-stream is labeled properly so that ACM::Process() can use this // information. In case of failure, the previous states of the VAD & DTX // are kept. // // Inputs/Output: // -enable_dtx : if true DTX will be enabled otherwise the DTX is // disabled. If codec has internal DTX that will be // used, otherwise WebRtc-CNG is used. In the latter // case VAD is automatically activated. // -enable_vad : if true WebRtc-VAD is enabled, otherwise VAD is // disabled, except for the case that DTX is enabled // but codec doesn't have internal DTX. In this case // VAD is enabled regardless of the value of // |enable_vad|. // -mode : this specifies the aggressiveness of VAD. // // Return value // -1 if failed to set DTX & VAD as specified, // 0 if succeeded. // int16_t SetVAD(bool* enable_dtx, bool* enable_vad, ACMVADMode* mode); // Registers comfort noise at |sample_rate_hz| to use |payload_type|. void SetCngPt(int sample_rate_hz, int payload_type); /////////////////////////////////////////////////////////////////////////// // UpdateEncoderSampFreq() // Call this function to update the encoder sampling frequency. This // is for codecs where one payload-name supports several encoder sampling // frequencies. Otherwise, to change the sampling frequency we need to // register new codec. ACM will consider that as registration of a new // codec, not a change in parameter. For iSAC, switching from WB to SWB // is treated as a change in parameter. Therefore, we need this function. // // Input: // -samp_freq_hz : encoder sampling frequency. // // Return value: // -1 if failed, or if this is meaningless for the given codec. // 0 if succeeded. // int16_t UpdateEncoderSampFreq(uint16_t samp_freq_hz) EXCLUSIVE_LOCKS_REQUIRED(codec_wrapper_lock_); /////////////////////////////////////////////////////////////////////////// // EncoderSampFreq() // Get the sampling frequency that the encoder (WebRtc wrapper) expects. // // Output: // -samp_freq_hz : sampling frequency, in Hertz, which the encoder // should be fed with. // // Return value: // -1 if failed to output sampling rate. // 0 if the sample rate is returned successfully. // int16_t EncoderSampFreq(uint16_t* samp_freq_hz) SHARED_LOCKS_REQUIRED(codec_wrapper_lock_); /////////////////////////////////////////////////////////////////////////// // SetISACMaxPayloadSize() // Set the maximum payload size of iSAC packets. No iSAC payload, // regardless of its frame-size, may exceed the given limit. For // an iSAC payload of size B bits and frame-size T sec we have; // (B < max_payload_len_bytes * 8) and (B/T < max_rate_bit_per_sec), c.f. // SetISACMaxRate(). // // Input: // -max_payload_len_bytes : maximum payload size in bytes. // // Return value: // -1 if failed to set the maximum payload-size. // 0 if the given length is set successfully. // int32_t SetISACMaxPayloadSize(const uint16_t max_payload_len_bytes); /////////////////////////////////////////////////////////////////////////// // SetISACMaxRate() // Set the maximum instantaneous rate of iSAC. For a payload of B bits // with a frame-size of T sec the instantaneous rate is B/T bits per // second. Therefore, (B/T < max_rate_bit_per_sec) and // (B < max_payload_len_bytes * 8) are always satisfied for iSAC payloads, // c.f SetISACMaxPayloadSize(). // // Input: // -max_rate_bps : maximum instantaneous bit-rate given in bits/sec. // // Return value: // -1 if failed to set the maximum rate. // 0 if the maximum rate is set successfully. // int32_t SetISACMaxRate(const uint32_t max_rate_bps); /////////////////////////////////////////////////////////////////////////// // int SetOpusApplication(OpusApplicationMode application, // bool disable_dtx_if_needed) // Sets the intended application for the Opus encoder. Opus uses this to // optimize the encoding for applications like VOIP and music. Currently, two // modes are supported: kVoip and kAudio. kAudio is only allowed when Opus // DTX is switched off. If DTX is on, and |application| == kAudio, a failure // will be triggered unless |disable_dtx_if_needed| == true, for which, the // DTX will be forced off. // // Input: // - application : intended application. // - disable_dtx_if_needed : whether to force Opus DTX to stop when needed. // // Return value: // -1 if failed or on codecs other than Opus. // 0 if succeeded. // int SetOpusApplication(OpusApplicationMode application, bool disable_dtx_if_needed); /////////////////////////////////////////////////////////////////////////// // int SetOpusMaxPlaybackRate() // Sets maximum playback rate the receiver will render, if the codec is Opus. // This is to tell Opus that it is enough to code the input audio up to a // bandwidth. Opus can take this information to optimize the bit rate and // increase the computation efficiency. // // Input: // -frequency_hz : maximum playback rate in Hz. // // Return value: // -1 if failed or on codecs other than Opus. // 0 if succeeded. // int SetOpusMaxPlaybackRate(int /* frequency_hz */); /////////////////////////////////////////////////////////////////////////// // EnableOpusDtx(bool force_voip) // Enable the DTX, if the codec is Opus. Currently, DTX can only be enabled // when the application mode is kVoip. If |force_voip| == true, the // application mode will be forced to kVoip. Otherwise, a failure will be // triggered if current application mode is kAudio. // Input: // - force_voip : whether to force application mode to kVoip. // Return value: // -1 if failed or on codecs other than Opus. // 0 if succeeded. // int EnableOpusDtx(bool force_voip); /////////////////////////////////////////////////////////////////////////// // DisbleOpusDtx() // Disable the DTX, if the codec is Opus. // Return value: // -1 if failed or on codecs other than Opus. // 0 if succeeded. // int DisableOpusDtx(); /////////////////////////////////////////////////////////////////////////// // HasFrameToEncode() // Returns true if there is enough audio buffered for encoding, such that // calling Encode() will return a payload. // bool HasFrameToEncode() const; // Returns a pointer to the AudioDecoder part of a joint encoder-decoder // object, if it exists. Otherwise, nullptr is returned. AudioDecoder* Decoder(); /////////////////////////////////////////////////////////////////////////// // bool HasInternalFEC() // Used to check if the codec has internal FEC. // // Return value: // true if the codec has an internal FEC, e.g. Opus. // false otherwise. // bool HasInternalFEC() const { ReadLockScoped rl(codec_wrapper_lock_); return has_internal_fec_; } /////////////////////////////////////////////////////////////////////////// // int SetFEC(); // Sets the codec internal FEC. No effects on codecs that do not provide // internal FEC. // // Input: // -enable_fec : if true FEC will be enabled otherwise the FEC is // disabled. // // Return value: // -1 if failed, // 0 if succeeded. // int SetFEC(bool enable_fec); /////////////////////////////////////////////////////////////////////////// // int SetPacketLossRate() // Sets expected packet loss rate for encoding. Some encoders provide packet // loss gnostic encoding to make stream less sensitive to packet losses, // through e.g., FEC. No effects on codecs that do not provide such encoding. // // Input: // -loss_rate : expected packet loss rate (0 -- 100 inclusive). // // Return value: // -1 if failed, // 0 if succeeded or packet loss rate is ignored. // int SetPacketLossRate(int /* loss_rate */); // Sets if CopyRed should be enabled. void EnableCopyRed(bool enable, int red_payload_type); // This method is only for testing. const AudioEncoder* GetAudioEncoder() const; private: bool has_internal_fec_ GUARDED_BY(codec_wrapper_lock_); bool copy_red_enabled_ GUARDED_BY(codec_wrapper_lock_); WebRtcACMCodecParams encoder_params_ GUARDED_BY(codec_wrapper_lock_); // Used to lock wrapper internal data // such as buffers and state variables. RWLockWrapper& codec_wrapper_lock_; uint32_t last_timestamp_ GUARDED_BY(codec_wrapper_lock_); uint32_t unique_id_; void ResetAudioEncoder() EXCLUSIVE_LOCKS_REQUIRED(codec_wrapper_lock_); OpusApplicationMode GetOpusApplication(int num_channels, bool enable_dtx) const EXCLUSIVE_LOCKS_REQUIRED(codec_wrapper_lock_); rtc::scoped_ptr audio_encoder_ GUARDED_BY(codec_wrapper_lock_); rtc::scoped_ptr cng_encoder_ GUARDED_BY(codec_wrapper_lock_); rtc::scoped_ptr red_encoder_ GUARDED_BY(codec_wrapper_lock_); AudioEncoder* encoder_ GUARDED_BY(codec_wrapper_lock_); AudioDecoderProxy decoder_proxy_ GUARDED_BY(codec_wrapper_lock_); WebRtcACMCodecParams acm_codec_params_ GUARDED_BY(codec_wrapper_lock_); int bitrate_bps_ GUARDED_BY(codec_wrapper_lock_); bool fec_enabled_ GUARDED_BY(codec_wrapper_lock_); int loss_rate_ GUARDED_BY(codec_wrapper_lock_); int max_playback_rate_hz_ GUARDED_BY(codec_wrapper_lock_); int max_payload_size_bytes_ GUARDED_BY(codec_wrapper_lock_); int max_rate_bps_ GUARDED_BY(codec_wrapper_lock_); bool opus_dtx_enabled_ GUARDED_BY(codec_wrapper_lock_); bool is_opus_ GUARDED_BY(codec_wrapper_lock_); bool is_isac_ GUARDED_BY(codec_wrapper_lock_); bool first_frame_ GUARDED_BY(codec_wrapper_lock_); uint32_t rtp_timestamp_ GUARDED_BY(codec_wrapper_lock_); uint32_t last_rtp_timestamp_ GUARDED_BY(codec_wrapper_lock_); // Map from payload type to CNG sample rate (Hz). std::map cng_pt_ GUARDED_BY(codec_wrapper_lock_); int red_payload_type_ GUARDED_BY(codec_wrapper_lock_); OpusApplicationMode opus_application_ GUARDED_BY(codec_wrapper_lock_); bool opus_application_set_ GUARDED_BY(codec_wrapper_lock_); }; } // namespace acm2 } // namespace webrtc #endif // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_GENERIC_CODEC_H_