diff --git a/data/audio_processing/output_data_float.pb b/data/audio_processing/output_data_float.pb index b63ff9253..51346afbc 100644 Binary files a/data/audio_processing/output_data_float.pb and b/data/audio_processing/output_data_float.pb differ diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi index bbb833704..c73bb2a47 100644 --- a/webrtc/modules/audio_processing/audio_processing.gypi +++ b/webrtc/modules/audio_processing/audio_processing.gypi @@ -70,6 +70,8 @@ 'noise_suppression_impl.h', 'processing_component.cc', 'processing_component.h', + 'rms_level.cc', + 'rms_level.h', 'typing_detection.cc', 'typing_detection.h', 'utility/delay_estimator.c', diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h index 6db1d12bc..15e01b95f 100644 --- a/webrtc/modules/audio_processing/include/audio_processing.h +++ b/webrtc/modules/audio_processing/include/audio_processing.h @@ -622,8 +622,7 @@ class LevelEstimator { // frames since the last call to RMS(). The returned value is positive but // should be interpreted as negative. It is constrained to [0, 127]. // - // The computation follows: - // http://tools.ietf.org/html/draft-ietf-avtext-client-to-mixer-audio-level-05 + // The computation follows: https://tools.ietf.org/html/rfc6465 // with the intent that it can provide the RTP audio level indication. // // Frames passed to ProcessStream() with an |_energy| of zero are considered diff --git a/webrtc/modules/audio_processing/level_estimator_impl.cc b/webrtc/modules/audio_processing/level_estimator_impl.cc index a91e96300..d209d4078 100644 --- a/webrtc/modules/audio_processing/level_estimator_impl.cc +++ b/webrtc/modules/audio_processing/level_estimator_impl.cc @@ -10,107 +10,35 @@ #include "webrtc/modules/audio_processing/level_estimator_impl.h" -#include -#include -#include - #include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/system_wrappers/interface/critical_section_wrapper.h" namespace webrtc { -namespace { - -const float kMaxSquaredLevel = 32768.0 * 32768.0; - -float SumSquare(const int16_t* data, int length) { - float sum_square = 0.f; - for (int i = 0; i < length; ++i) { - sum_square += data[i] * data[i]; - } - return sum_square; -} - -class Level { - public: - static const int kMinLevel = 127; - - Level() - : sum_square_(0.0), - sample_count_(0) {} - ~Level() {} - - void Init() { - sum_square_ = 0.0; - sample_count_ = 0; - } - - void Process(const int16_t* data, int length) { - assert(data != NULL); - assert(length > 0); - sum_square_ += SumSquare(data, length); - sample_count_ += length; - } - - void ProcessMuted(int length) { - assert(length > 0); - sample_count_ += length; - } - - int RMS() { - if (sample_count_ == 0 || sum_square_ == 0.0) { - Init(); - return kMinLevel; - } - - // Normalize by the max level. - float rms = sum_square_ / (sample_count_ * kMaxSquaredLevel); - // 20log_10(x^0.5) = 10log_10(x) - rms = 10 * log10(rms); - if (rms > 0) - rms = 0; - else if (rms < -kMinLevel) - rms = -kMinLevel; - - rms = -rms; - Init(); - return static_cast(rms + 0.5); - } - - private: - float sum_square_; - int sample_count_; -}; - -} // namespace LevelEstimatorImpl::LevelEstimatorImpl(const AudioProcessing* apm, CriticalSectionWrapper* crit) : ProcessingComponent(), - apm_(apm), crit_(crit) {} LevelEstimatorImpl::~LevelEstimatorImpl() {} int LevelEstimatorImpl::ProcessStream(AudioBuffer* audio) { if (!is_component_enabled()) { - return apm_->kNoError; + return AudioProcessing::kNoError; } - Level* level = static_cast(handle(0)); + RMSLevel* rms_level = static_cast(handle(0)); if (audio->is_muted()) { - level->ProcessMuted(audio->samples_per_channel()); - return apm_->kNoError; + rms_level->ProcessMuted(audio->samples_per_channel() * + audio->num_channels()); + } else { + for (int i = 0; i < audio->num_channels(); ++i) { + rms_level->Process(audio->data(i), audio->samples_per_channel()); + } } - const int16_t* mixed_data = audio->data(0); - if (audio->num_channels() > 1) { - audio->CopyAndMix(1); - mixed_data = audio->mixed_data(0); - } - - level->Process(mixed_data, audio->samples_per_channel()); - - return apm_->kNoError; + return AudioProcessing::kNoError; } int LevelEstimatorImpl::Enable(bool enable) { @@ -124,42 +52,38 @@ bool LevelEstimatorImpl::is_enabled() const { int LevelEstimatorImpl::RMS() { if (!is_component_enabled()) { - return apm_->kNotEnabledError; + return AudioProcessing::kNotEnabledError; } - Level* level = static_cast(handle(0)); - return level->RMS(); + RMSLevel* rms_level = static_cast(handle(0)); + return rms_level->RMS(); } +// The ProcessingComponent implementation is pretty weird in this class since +// we have only a single instance of the trivial underlying component. void* LevelEstimatorImpl::CreateHandle() const { - return new Level; + return new RMSLevel; } void LevelEstimatorImpl::DestroyHandle(void* handle) const { - assert(handle != NULL); - Level* level = static_cast(handle); - delete level; + delete static_cast(handle); } int LevelEstimatorImpl::InitializeHandle(void* handle) const { - assert(handle != NULL); - Level* level = static_cast(handle); - level->Init(); - - return apm_->kNoError; + static_cast(handle)->Reset(); + return AudioProcessing::kNoError; } int LevelEstimatorImpl::ConfigureHandle(void* /*handle*/) const { - return apm_->kNoError; + return AudioProcessing::kNoError; } int LevelEstimatorImpl::num_handles_required() const { return 1; } -int LevelEstimatorImpl::GetHandleError(void* handle) const { - // The component has no detailed errors. - assert(handle != NULL); - return apm_->kUnspecifiedError; +int LevelEstimatorImpl::GetHandleError(void* /*handle*/) const { + return AudioProcessing::kUnspecifiedError; } + } // namespace webrtc diff --git a/webrtc/modules/audio_processing/level_estimator_impl.h b/webrtc/modules/audio_processing/level_estimator_impl.h index bb5665f37..b38337d4d 100644 --- a/webrtc/modules/audio_processing/level_estimator_impl.h +++ b/webrtc/modules/audio_processing/level_estimator_impl.h @@ -13,6 +13,7 @@ #include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/modules/audio_processing/processing_component.h" +#include "webrtc/modules/audio_processing/rms_level.h" namespace webrtc { @@ -44,7 +45,6 @@ class LevelEstimatorImpl : public LevelEstimator, virtual int num_handles_required() const OVERRIDE; virtual int GetHandleError(void* handle) const OVERRIDE; - const AudioProcessing* apm_; CriticalSectionWrapper* crit_; }; diff --git a/webrtc/modules/audio_processing/rms_level.cc b/webrtc/modules/audio_processing/rms_level.cc new file mode 100644 index 000000000..963622b81 --- /dev/null +++ b/webrtc/modules/audio_processing/rms_level.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/rms_level.h" + +#include +#include + +namespace webrtc { + +static const float kMaxSquaredLevel = 32768.0 * 32768.0; + +RMSLevel::RMSLevel() + : sum_square_(0.0), + sample_count_(0) {} + +RMSLevel::~RMSLevel() {} + +void RMSLevel::Reset() { + sum_square_ = 0.0; + sample_count_ = 0; +} + +void RMSLevel::Process(const int16_t* data, int length) { + for (int i = 0; i < length; ++i) { + sum_square_ += data[i] * data[i]; + } + sample_count_ += length; +} + +void RMSLevel::ProcessMuted(int length) { + sample_count_ += length; +} + +int RMSLevel::RMS() { + if (sample_count_ == 0 || sum_square_ == 0.0) { + Reset(); + return kMinLevel; + } + + // Normalize by the max level. + float rms = sum_square_ / (sample_count_ * kMaxSquaredLevel); + // 20log_10(x^0.5) = 10log_10(x) + rms = 10 * log10(rms); + assert(rms <= 0); + if (rms < -kMinLevel) + rms = -kMinLevel; + + rms = -rms; + Reset(); + return static_cast(rms + 0.5); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/rms_level.h b/webrtc/modules/audio_processing/rms_level.h new file mode 100644 index 000000000..1b19803ba --- /dev/null +++ b/webrtc/modules/audio_processing/rms_level.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Computes the root mean square (RMS) level in dBFs (decibels from digital +// full-scale) of audio data. The computation follows RFC 6465: +// https://tools.ietf.org/html/rfc6465 +// with the intent that it can provide the RTP audio level indication. +// +// The expected approach is to provide constant-sized chunks of audio to +// Process(). When enough chunks have been accumulated to form a packet, call +// RMS() to get the audio level indicator for the RTP header. +class RMSLevel { + public: + static const int kMinLevel = 127; + + RMSLevel(); + ~RMSLevel(); + + // Can be called to reset internal states, but is not required during normal + // operation. + void Reset(); + + // Pass each chunk of audio to Process() to accumulate the level. + void Process(const int16_t* data, int length); + + // If all samples with the given |length| have a magnitude of zero, this is + // a shortcut to avoid some computation. + void ProcessMuted(int length); + + // Computes the RMS level over all data passed to Process() since the last + // call to RMS(). The returned value is positive but should be interpreted as + // negative as per the RFC. It is constrained to [0, 127]. + int RMS(); + + private: + float sum_square_; + int sample_count_; +}; + +} // namespace webrtc diff --git a/webrtc/voice_engine/channel.cc b/webrtc/voice_engine/channel.cc index b3c53f854..ebb3290bb 100644 --- a/webrtc/voice_engine/channel.cc +++ b/webrtc/voice_engine/channel.cc @@ -106,7 +106,7 @@ Channel::SendData(FrameType frameType, // Store current audio level in the RTP/RTCP module. // The level will be used in combination with voice-activity state // (frameType) to add an RTP header extension - _rtpRtcpModule->SetAudioLevel(rtp_audioproc_->level_estimator()->RMS()); + _rtpRtcpModule->SetAudioLevel(rms_level_.RMS()); } // Push data from ACM to RTP/RTCP-module to deliver audio frame for @@ -3220,20 +3220,7 @@ Channel::GetRemoteCSRCs(unsigned int arrCSRC[15]) } int Channel::SetSendAudioLevelIndicationStatus(bool enable, unsigned char id) { - if (rtp_audioproc_.get() == NULL) { - rtp_audioproc_.reset(AudioProcessing::Create(VoEModuleId(_instanceId, - _channelId))); - } - - if (rtp_audioproc_->level_estimator()->Enable(enable) != - AudioProcessing::kNoError) { - _engineStatisticsPtr->SetLastError(VE_APM_ERROR, kTraceError, - "Failed to enable AudioProcessing::level_estimator()"); - return -1; - } - _includeAudioLevelIndication = enable; - return SetSendRtpHeaderExtension(enable, kRtpExtensionAudioLevel, id); } @@ -3936,12 +3923,8 @@ Channel::PrepareEncodeAndSend(int mixingFrequency) InsertInbandDtmfTone(); if (_includeAudioLevelIndication) { - // Performs level analysis only; does not affect the signal. - int err = rtp_audioproc_->ProcessStream(&_audioFrame); - if (err) { - LOG(LS_ERROR) << "ProcessStream() error: " << err; - assert(false); - } + int length = _audioFrame.samples_per_channel_ * _audioFrame.num_channels_; + rms_level_.Process(_audioFrame.data_, length); } return 0; diff --git a/webrtc/voice_engine/channel.h b/webrtc/voice_engine/channel.h index 8166f1c27..e605986d0 100644 --- a/webrtc/voice_engine/channel.h +++ b/webrtc/voice_engine/channel.h @@ -8,13 +8,14 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_VOICE_ENGINE_CHANNEL_H -#define WEBRTC_VOICE_ENGINE_CHANNEL_H +#ifndef WEBRTC_VOICE_ENGINE_CHANNEL_H_ +#define WEBRTC_VOICE_ENGINE_CHANNEL_H_ #include "webrtc/common_audio/resampler/include/push_resampler.h" #include "webrtc/common_types.h" #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h" #include "webrtc/modules/audio_conference_mixer/interface/audio_conference_mixer_defines.h" +#include "webrtc/modules/audio_processing/rms_level.h" #include "webrtc/modules/rtp_rtcp/interface/rtp_header_parser.h" #include "webrtc/modules/rtp_rtcp/interface/rtp_rtcp.h" #include "webrtc/modules/utility/interface/file_player.h" @@ -556,7 +557,7 @@ private: VoiceEngineObserver* _voiceEngineObserverPtr; // owned by base CriticalSectionWrapper* _callbackCritSectPtr; // owned by base Transport* _transportPtr; // WebRtc socket or external transport - scoped_ptr rtp_audioproc_; + RMSLevel rms_level_; scoped_ptr rx_audioproc_; // far end AudioProcessing VoERxVadCallback* _rxVadObserverPtr; int32_t _oldVadDecision; @@ -606,4 +607,4 @@ private: } // namespace voe } // namespace webrtc -#endif // WEBRTC_VOICE_ENGINE_CHANNEL_H +#endif // WEBRTC_VOICE_ENGINE_CHANNEL_H_