Allow the RTP level indicator computation to work at any sample rate.

Break out the computation to a separate class, and call directly into this from channel.cc rather than going through AudioProcessing. This circumvents AudioProcessing's sample rate limitations. We now compute the RMS over all samples rather than downmixing to a single channel. This makes the call point in channel.cc easier, is more "correct" and should have similar (negligible) complexity. This caused slight changes in the RMS output, so the ApmTest.Process reference has been updated. Snippet of the failing output: [ RUN ] ApmTest.Process Running test 4 of 12... Value of: rms_level Actual: 27 Expected: test->rms_level() Which is: 28 Running test 5 of 12... Value of: rms_level Actual: 26 Expected: test->rms_level() Which is: 27 Running test 6 of 12... Value of: rms_level Actual: 26 Expected: test->rms_level() Which is: 27 Running test 10 of 12... Value of: rms_level Actual: 27 Expected: test->rms_level() Which is: 28 Running test 11 of 12... Value of: rms_level Actual: 26 Expected: test->rms_level() Which is: 27 Running test 12 of 12... Value of: rms_level Actual: 26 Expected: test->rms_level() Which is: 27 BUG=3290 TESTED=Chrome assert is avoided and both voe_cmd_test and apprtc produce reasonable printed out results from RMS(). R=bjornv@webrtc.org Review URL: https://webrtc-codereview.appspot.com/16459004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@6056 4adac7df-926f-26a2-2b94-8c16560cd09d
2014-05-05 18:22:21 +00:00
parent a0edf4cb04
commit 382c0c209d
9 changed files with 147 additions and 126 deletions
--- a/data/audio_processing/output_data_float.pb
+++ b/data/audio_processing/output_data_float.pb
--- a/webrtc/modules/audio_processing/audio_processing.gypi
+++ b/webrtc/modules/audio_processing/audio_processing.gypi
@@ -70,6 +70,8 @@
        'noise_suppression_impl.h',
        'processing_component.cc',
        'processing_component.h',
+        'rms_level.cc',
+        'rms_level.h',
        'typing_detection.cc',
        'typing_detection.h',
        'utility/delay_estimator.c',
--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h
@@ -622,8 +622,7 @@ class LevelEstimator {
  // frames since the last call to RMS(). The returned value is positive but
  // should be interpreted as negative. It is constrained to [0, 127].
  //
-  // The computation follows:
-  // http://tools.ietf.org/html/draft-ietf-avtext-client-to-mixer-audio-level-05
+  // The computation follows: https://tools.ietf.org/html/rfc6465
  // with the intent that it can provide the RTP audio level indication.
  //
  // Frames passed to ProcessStream() with an |_energy| of zero are considered
--- a/webrtc/modules/audio_processing/level_estimator_impl.cc
+++ b/webrtc/modules/audio_processing/level_estimator_impl.cc
@@ -10,107 +10,35 @@

 #include "webrtc/modules/audio_processing/level_estimator_impl.h"

-#include <assert.h>
-#include <math.h>
-#include <string.h>
-
 #include "webrtc/modules/audio_processing/audio_buffer.h"
+#include "webrtc/modules/audio_processing/include/audio_processing.h"
 #include "webrtc/system_wrappers/interface/critical_section_wrapper.h"

 namespace webrtc {
-namespace {
-
-const float kMaxSquaredLevel = 32768.0 * 32768.0;
-
-float SumSquare(const int16_t* data, int length) {
-  float sum_square = 0.f;
-  for (int i = 0; i < length; ++i) {
-    sum_square += data[i] * data[i];
-  }
-  return sum_square;
-}
-
-class Level {
- public:
-  static const int kMinLevel = 127;
-
-  Level()
-    : sum_square_(0.0),
-      sample_count_(0) {}
-  ~Level() {}
-
-  void Init() {
-    sum_square_ = 0.0;
-    sample_count_ = 0;
-  }
-
-  void Process(const int16_t* data, int length) {
-    assert(data != NULL);
-    assert(length > 0);
-    sum_square_ += SumSquare(data, length);
-    sample_count_ += length;
-  }
-
-  void ProcessMuted(int length) {
-    assert(length > 0);
-    sample_count_ += length;
-  }
-
-  int RMS() {
-    if (sample_count_ == 0 || sum_square_ == 0.0) {
-      Init();
-      return kMinLevel;
-    }
-
-    // Normalize by the max level.
-    float rms = sum_square_ / (sample_count_ * kMaxSquaredLevel);
-    // 20log_10(x^0.5) = 10log_10(x)
-    rms = 10 * log10(rms);
-    if (rms > 0)
-      rms = 0;
-    else if (rms < -kMinLevel)
-      rms = -kMinLevel;
-
-    rms = -rms;
-    Init();
-    return static_cast<int>(rms + 0.5);
-  }
-
- private:
-  float sum_square_;
-  int sample_count_;
-};
-
-}  // namespace

 LevelEstimatorImpl::LevelEstimatorImpl(const AudioProcessing* apm,
                                       CriticalSectionWrapper* crit)
    : ProcessingComponent(),
-      apm_(apm),
      crit_(crit) {}

 LevelEstimatorImpl::~LevelEstimatorImpl() {}

 int LevelEstimatorImpl::ProcessStream(AudioBuffer* audio) {
  if (!is_component_enabled()) {
-    return apm_->kNoError;
+    return AudioProcessing::kNoError;
  }

-  Level* level = static_cast<Level*>(handle(0));
+  RMSLevel* rms_level = static_cast<RMSLevel*>(handle(0));
  if (audio->is_muted()) {
-    level->ProcessMuted(audio->samples_per_channel());
-    return apm_->kNoError;
+    rms_level->ProcessMuted(audio->samples_per_channel() *
+                            audio->num_channels());
+  } else {
+    for (int i = 0; i < audio->num_channels(); ++i) {
+      rms_level->Process(audio->data(i), audio->samples_per_channel());
+    }
  }

-  const int16_t* mixed_data = audio->data(0);
-  if (audio->num_channels() > 1) {
-    audio->CopyAndMix(1);
-    mixed_data = audio->mixed_data(0);
-  }
-
-  level->Process(mixed_data, audio->samples_per_channel());
-
-  return apm_->kNoError;
+  return AudioProcessing::kNoError;
 }

 int LevelEstimatorImpl::Enable(bool enable) {
@@ -124,42 +52,38 @@ bool LevelEstimatorImpl::is_enabled() const {

 int LevelEstimatorImpl::RMS() {
  if (!is_component_enabled()) {
-    return apm_->kNotEnabledError;
+    return AudioProcessing::kNotEnabledError;
  }

-  Level* level = static_cast<Level*>(handle(0));
-  return level->RMS();
+  RMSLevel* rms_level = static_cast<RMSLevel*>(handle(0));
+  return rms_level->RMS();
 }

+// The ProcessingComponent implementation is pretty weird in this class since
+// we have only a single instance of the trivial underlying component.
 void* LevelEstimatorImpl::CreateHandle() const {
-  return new Level;
+  return new RMSLevel;
 }

 void LevelEstimatorImpl::DestroyHandle(void* handle) const {
-  assert(handle != NULL);
-  Level* level = static_cast<Level*>(handle);
-  delete level;
+  delete static_cast<RMSLevel*>(handle);
 }

 int LevelEstimatorImpl::InitializeHandle(void* handle) const {
-  assert(handle != NULL);
-  Level* level = static_cast<Level*>(handle);
-  level->Init();
-
-  return apm_->kNoError;
+  static_cast<RMSLevel*>(handle)->Reset();
+  return AudioProcessing::kNoError;
 }

 int LevelEstimatorImpl::ConfigureHandle(void* /*handle*/) const {
-  return apm_->kNoError;
+  return AudioProcessing::kNoError;
 }

 int LevelEstimatorImpl::num_handles_required() const {
  return 1;
 }

-int LevelEstimatorImpl::GetHandleError(void* handle) const {
-  // The component has no detailed errors.
-  assert(handle != NULL);
-  return apm_->kUnspecifiedError;
+int LevelEstimatorImpl::GetHandleError(void* /*handle*/) const {
+  return AudioProcessing::kUnspecifiedError;
 }
+
 }  // namespace webrtc
--- a/webrtc/modules/audio_processing/level_estimator_impl.h
+++ b/webrtc/modules/audio_processing/level_estimator_impl.h
@@ -13,6 +13,7 @@

 #include "webrtc/modules/audio_processing/include/audio_processing.h"
 #include "webrtc/modules/audio_processing/processing_component.h"
+#include "webrtc/modules/audio_processing/rms_level.h"

 namespace webrtc {

@@ -44,7 +45,6 @@ class LevelEstimatorImpl : public LevelEstimator,
  virtual int num_handles_required() const OVERRIDE;
  virtual int GetHandleError(void* handle) const OVERRIDE;

-  const AudioProcessing* apm_;
  CriticalSectionWrapper* crit_;
 };

--- a/webrtc/modules/audio_processing/rms_level.cc
+++ b/webrtc/modules/audio_processing/rms_level.cc
@@ -0,0 +1,61 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/rms_level.h"
+
+#include <assert.h>
+#include <math.h>
+
+namespace webrtc {
+
+static const float kMaxSquaredLevel = 32768.0 * 32768.0;
+
+RMSLevel::RMSLevel()
+    : sum_square_(0.0),
+      sample_count_(0) {}
+
+RMSLevel::~RMSLevel() {}
+
+void RMSLevel::Reset() {
+  sum_square_ = 0.0;
+  sample_count_ = 0;
+}
+
+void RMSLevel::Process(const int16_t* data, int length) {
+  for (int i = 0; i < length; ++i) {
+    sum_square_ += data[i] * data[i];
+  }
+  sample_count_ += length;
+}
+
+void RMSLevel::ProcessMuted(int length) {
+  sample_count_ += length;
+}
+
+int RMSLevel::RMS() {
+  if (sample_count_ == 0 || sum_square_ == 0.0) {
+    Reset();
+    return kMinLevel;
+  }
+
+  // Normalize by the max level.
+  float rms = sum_square_ / (sample_count_ * kMaxSquaredLevel);
+  // 20log_10(x^0.5) = 10log_10(x)
+  rms = 10 * log10(rms);
+  assert(rms <= 0);
+  if (rms < -kMinLevel)
+    rms = -kMinLevel;
+
+  rms = -rms;
+  Reset();
+  return static_cast<int>(rms + 0.5);
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/rms_level.h
+++ b/webrtc/modules/audio_processing/rms_level.h
@@ -0,0 +1,51 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+// Computes the root mean square (RMS) level in dBFs (decibels from digital
+// full-scale) of audio data. The computation follows RFC 6465:
+// https://tools.ietf.org/html/rfc6465
+// with the intent that it can provide the RTP audio level indication.
+//
+// The expected approach is to provide constant-sized chunks of audio to
+// Process(). When enough chunks have been accumulated to form a packet, call
+// RMS() to get the audio level indicator for the RTP header.
+class RMSLevel {
+ public:
+  static const int kMinLevel = 127;
+
+  RMSLevel();
+  ~RMSLevel();
+
+  // Can be called to reset internal states, but is not required during normal
+  // operation.
+  void Reset();
+
+  // Pass each chunk of audio to Process() to accumulate the level.
+  void Process(const int16_t* data, int length);
+
+  // If all samples with the given |length| have a magnitude of zero, this is
+  // a shortcut to avoid some computation.
+  void ProcessMuted(int length);
+
+  // Computes the RMS level over all data passed to Process() since the last
+  // call to RMS(). The returned value is positive but should be interpreted as
+  // negative as per the RFC. It is constrained to [0, 127].
+  int RMS();
+
+ private:
+  float sum_square_;
+  int sample_count_;
+};
+
+}  // namespace webrtc
--- a/webrtc/voice_engine/channel.cc
+++ b/webrtc/voice_engine/channel.cc
@@ -106,7 +106,7 @@ Channel::SendData(FrameType frameType,
        // Store current audio level in the RTP/RTCP module.
        // The level will be used in combination with voice-activity state
        // (frameType) to add an RTP header extension
-        _rtpRtcpModule->SetAudioLevel(rtp_audioproc_->level_estimator()->RMS());
+        _rtpRtcpModule->SetAudioLevel(rms_level_.RMS());
    }

    // Push data from ACM to RTP/RTCP-module to deliver audio frame for
@@ -3220,20 +3220,7 @@ Channel::GetRemoteCSRCs(unsigned int arrCSRC[15])
 }

 int Channel::SetSendAudioLevelIndicationStatus(bool enable, unsigned char id) {
-  if (rtp_audioproc_.get() == NULL) {
-    rtp_audioproc_.reset(AudioProcessing::Create(VoEModuleId(_instanceId,
-                                                             _channelId)));
-  }
-
-  if (rtp_audioproc_->level_estimator()->Enable(enable) !=
-      AudioProcessing::kNoError) {
-    _engineStatisticsPtr->SetLastError(VE_APM_ERROR, kTraceError,
-        "Failed to enable AudioProcessing::level_estimator()");
-    return -1;
-  }
-
  _includeAudioLevelIndication = enable;
-
  return SetSendRtpHeaderExtension(enable, kRtpExtensionAudioLevel, id);
 }

@@ -3936,12 +3923,8 @@ Channel::PrepareEncodeAndSend(int mixingFrequency)
    InsertInbandDtmfTone();

    if (_includeAudioLevelIndication) {
-      // Performs level analysis only; does not affect the signal.
-      int err = rtp_audioproc_->ProcessStream(&_audioFrame);
-      if (err) {
-        LOG(LS_ERROR) << "ProcessStream() error: " << err;
-        assert(false);
-      }
+      int length = _audioFrame.samples_per_channel_ * _audioFrame.num_channels_;
+      rms_level_.Process(_audioFrame.data_, length);
    }

    return 0;
--- a/webrtc/voice_engine/channel.h
+++ b/webrtc/voice_engine/channel.h
@@ -8,13 +8,14 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef WEBRTC_VOICE_ENGINE_CHANNEL_H
-#define WEBRTC_VOICE_ENGINE_CHANNEL_H
+#ifndef WEBRTC_VOICE_ENGINE_CHANNEL_H_
+#define WEBRTC_VOICE_ENGINE_CHANNEL_H_

 #include "webrtc/common_audio/resampler/include/push_resampler.h"
 #include "webrtc/common_types.h"
 #include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
 #include "webrtc/modules/audio_conference_mixer/interface/audio_conference_mixer_defines.h"
+#include "webrtc/modules/audio_processing/rms_level.h"
 #include "webrtc/modules/rtp_rtcp/interface/rtp_header_parser.h"
 #include "webrtc/modules/rtp_rtcp/interface/rtp_rtcp.h"
 #include "webrtc/modules/utility/interface/file_player.h"
@@ -556,7 +557,7 @@ private:
    VoiceEngineObserver* _voiceEngineObserverPtr; // owned by base
    CriticalSectionWrapper* _callbackCritSectPtr; // owned by base
    Transport* _transportPtr; // WebRtc socket or external transport
-    scoped_ptr<AudioProcessing> rtp_audioproc_;
+    RMSLevel rms_level_;
    scoped_ptr<AudioProcessing> rx_audioproc_; // far end AudioProcessing
    VoERxVadCallback* _rxVadObserverPtr;
    int32_t _oldVadDecision;
@@ -606,4 +607,4 @@ private:
 }  // namespace voe
 }  // namespace webrtc

-#endif  // WEBRTC_VOICE_ENGINE_CHANNEL_H
+#endif  // WEBRTC_VOICE_ENGINE_CHANNEL_H_