Add RMS computation for the RTP level indicator.

- Compute RMS over a packet's worth of audio to be sent in Channel, rather than the captured audio in TransmitMixer.
- We now use the entire packet rather than the last 10 ms frame.
- Restore functionality to LevelEstimator.
- Fix a bug in the splitting filter.
- Fix a number of bugs in process_test related to a poorly named
  AudioFrame member.
- Update the unittest protobuf and float reference output.
- Add audioproc unittests.
- Reenable voe_extended_tests, and add a real function test.
- Use correct minimum level of 127.

TEST=audioproc_unittest, audioproc, voe_extended_test, voe_auto_test

Review URL: http://webrtc-codereview.appspot.com/279003

git-svn-id: http://webrtc.googlecode.com/svn/trunk@950 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
andrew@webrtc.org 2011-11-15 16:57:56 +00:00
parent 6a85b17a0a
commit 755b04a06e
19 changed files with 967 additions and 641 deletions

View File

@ -49,6 +49,7 @@ void CalculateEnergy(AudioFrame& audioFrame)
for(int position = 0; position < audioFrame._payloadDataLengthInSamples; for(int position = 0; position < audioFrame._payloadDataLengthInSamples;
position++) position++)
{ {
// TODO(andrew): this can easily overflow.
audioFrame._energy += audioFrame._payloadData[position] * audioFrame._energy += audioFrame._payloadData[position] *
audioFrame._payloadData[position]; audioFrame._payloadData[position];
} }

View File

@ -10,6 +10,8 @@
#include "audio_buffer.h" #include "audio_buffer.h"
#include "signal_processing_library.h"
namespace webrtc { namespace webrtc {
namespace { namespace {
@ -19,18 +21,14 @@ enum {
kSamplesPer32kHzChannel = 320 kSamplesPer32kHzChannel = 320
}; };
void StereoToMono(const WebRtc_Word16* left, const WebRtc_Word16* right, void StereoToMono(const int16_t* left, const int16_t* right,
WebRtc_Word16* out, int samples_per_channel) { int16_t* out, int samples_per_channel) {
WebRtc_Word32 data_int32 = 0; assert(left != NULL && right != NULL && out != NULL);
for (int i = 0; i < samples_per_channel; i++) { for (int i = 0; i < samples_per_channel; i++) {
data_int32 = (left[i] + right[i]) >> 1; int32_t data32 = (static_cast<int32_t>(left[i]) +
if (data_int32 > 32767) { static_cast<int32_t>(right[i])) >> 1;
data_int32 = 32767;
} else if (data_int32 < -32768) {
data_int32 = -32768;
}
out[i] = static_cast<WebRtc_Word16>(data_int32); out[i] = WebRtcSpl_SatW32ToW16(data32);
} }
} }
} // namespace } // namespace
@ -40,7 +38,7 @@ struct AudioChannel {
memset(data, 0, sizeof(data)); memset(data, 0, sizeof(data));
} }
WebRtc_Word16 data[kSamplesPer32kHzChannel]; int16_t data[kSamplesPer32kHzChannel];
}; };
struct SplitAudioChannel { struct SplitAudioChannel {
@ -53,8 +51,8 @@ struct SplitAudioChannel {
memset(synthesis_filter_state2, 0, sizeof(synthesis_filter_state2)); memset(synthesis_filter_state2, 0, sizeof(synthesis_filter_state2));
} }
WebRtc_Word16 low_pass_data[kSamplesPer16kHzChannel]; int16_t low_pass_data[kSamplesPer16kHzChannel];
WebRtc_Word16 high_pass_data[kSamplesPer16kHzChannel]; int16_t high_pass_data[kSamplesPer16kHzChannel];
WebRtc_Word32 analysis_filter_state1[6]; WebRtc_Word32 analysis_filter_state1[6];
WebRtc_Word32 analysis_filter_state2[6]; WebRtc_Word32 analysis_filter_state2[6];
@ -69,46 +67,34 @@ AudioBuffer::AudioBuffer(int max_num_channels,
num_channels_(0), num_channels_(0),
num_mixed_channels_(0), num_mixed_channels_(0),
num_mixed_low_pass_channels_(0), num_mixed_low_pass_channels_(0),
data_was_mixed_(false),
samples_per_channel_(samples_per_channel), samples_per_channel_(samples_per_channel),
samples_per_split_channel_(samples_per_channel), samples_per_split_channel_(samples_per_channel),
reference_copied_(false), reference_copied_(false),
activity_(AudioFrame::kVadUnknown), activity_(AudioFrame::kVadUnknown),
is_muted_(false),
data_(NULL), data_(NULL),
channels_(NULL), channels_(NULL),
split_channels_(NULL), split_channels_(NULL),
mixed_channels_(NULL),
mixed_low_pass_channels_(NULL), mixed_low_pass_channels_(NULL),
low_pass_reference_channels_(NULL) { low_pass_reference_channels_(NULL) {
if (max_num_channels_ > 1) { if (max_num_channels_ > 1) {
channels_ = new AudioChannel[max_num_channels_]; channels_.reset(new AudioChannel[max_num_channels_]);
mixed_low_pass_channels_ = new AudioChannel[max_num_channels_]; mixed_channels_.reset(new AudioChannel[max_num_channels_]);
mixed_low_pass_channels_.reset(new AudioChannel[max_num_channels_]);
} }
low_pass_reference_channels_ = new AudioChannel[max_num_channels_]; low_pass_reference_channels_.reset(new AudioChannel[max_num_channels_]);
if (samples_per_channel_ == kSamplesPer32kHzChannel) { if (samples_per_channel_ == kSamplesPer32kHzChannel) {
split_channels_ = new SplitAudioChannel[max_num_channels_]; split_channels_.reset(new SplitAudioChannel[max_num_channels_]);
samples_per_split_channel_ = kSamplesPer16kHzChannel; samples_per_split_channel_ = kSamplesPer16kHzChannel;
} }
} }
AudioBuffer::~AudioBuffer() { AudioBuffer::~AudioBuffer() {}
if (channels_ != NULL) {
delete [] channels_;
}
if (mixed_low_pass_channels_ != NULL) { int16_t* AudioBuffer::data(int channel) const {
delete [] mixed_low_pass_channels_;
}
if (low_pass_reference_channels_ != NULL) {
delete [] low_pass_reference_channels_;
}
if (split_channels_ != NULL) {
delete [] split_channels_;
}
}
WebRtc_Word16* AudioBuffer::data(int channel) const {
assert(channel >= 0 && channel < num_channels_); assert(channel >= 0 && channel < num_channels_);
if (data_ != NULL) { if (data_ != NULL) {
return data_; return data_;
@ -117,31 +103,37 @@ WebRtc_Word16* AudioBuffer::data(int channel) const {
return channels_[channel].data; return channels_[channel].data;
} }
WebRtc_Word16* AudioBuffer::low_pass_split_data(int channel) const { int16_t* AudioBuffer::low_pass_split_data(int channel) const {
assert(channel >= 0 && channel < num_channels_); assert(channel >= 0 && channel < num_channels_);
if (split_channels_ == NULL) { if (split_channels_.get() == NULL) {
return data(channel); return data(channel);
} }
return split_channels_[channel].low_pass_data; return split_channels_[channel].low_pass_data;
} }
WebRtc_Word16* AudioBuffer::high_pass_split_data(int channel) const { int16_t* AudioBuffer::high_pass_split_data(int channel) const {
assert(channel >= 0 && channel < num_channels_); assert(channel >= 0 && channel < num_channels_);
if (split_channels_ == NULL) { if (split_channels_.get() == NULL) {
return NULL; return NULL;
} }
return split_channels_[channel].high_pass_data; return split_channels_[channel].high_pass_data;
} }
WebRtc_Word16* AudioBuffer::mixed_low_pass_data(int channel) const { int16_t* AudioBuffer::mixed_data(int channel) const {
assert(channel >= 0 && channel < num_mixed_channels_);
return mixed_channels_[channel].data;
}
int16_t* AudioBuffer::mixed_low_pass_data(int channel) const {
assert(channel >= 0 && channel < num_mixed_low_pass_channels_); assert(channel >= 0 && channel < num_mixed_low_pass_channels_);
return mixed_low_pass_channels_[channel].data; return mixed_low_pass_channels_[channel].data;
} }
WebRtc_Word16* AudioBuffer::low_pass_reference(int channel) const { int16_t* AudioBuffer::low_pass_reference(int channel) const {
assert(channel >= 0 && channel < num_channels_); assert(channel >= 0 && channel < num_channels_);
if (!reference_copied_) { if (!reference_copied_) {
return NULL; return NULL;
@ -174,10 +166,14 @@ void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
activity_ = activity; activity_ = activity;
} }
AudioFrame::VADActivity AudioBuffer::activity() { AudioFrame::VADActivity AudioBuffer::activity() const {
return activity_; return activity_;
} }
bool AudioBuffer::is_muted() const {
return is_muted_;
}
int AudioBuffer::num_channels() const { int AudioBuffer::num_channels() const {
return num_channels_; return num_channels_;
} }
@ -196,10 +192,15 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
assert(frame->_payloadDataLengthInSamples == samples_per_channel_); assert(frame->_payloadDataLengthInSamples == samples_per_channel_);
num_channels_ = frame->_audioChannel; num_channels_ = frame->_audioChannel;
data_was_mixed_ = false;
num_mixed_channels_ = 0; num_mixed_channels_ = 0;
num_mixed_low_pass_channels_ = 0; num_mixed_low_pass_channels_ = 0;
reference_copied_ = false; reference_copied_ = false;
activity_ = frame->_vadActivity; activity_ = frame->_vadActivity;
is_muted_ = false;
if (frame->_energy == 0) {
is_muted_ = true;
}
if (num_channels_ == 1) { if (num_channels_ == 1) {
// We can get away with a pointer assignment in this case. // We can get away with a pointer assignment in this case.
@ -207,9 +208,9 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
return; return;
} }
WebRtc_Word16* interleaved = frame->_payloadData; int16_t* interleaved = frame->_payloadData;
for (int i = 0; i < num_channels_; i++) { for (int i = 0; i < num_channels_; i++) {
WebRtc_Word16* deinterleaved = channels_[i].data; int16_t* deinterleaved = channels_[i].data;
int interleaved_idx = i; int interleaved_idx = i;
for (int j = 0; j < samples_per_channel_; j++) { for (int j = 0; j < samples_per_channel_; j++) {
deinterleaved[j] = interleaved[interleaved_idx]; deinterleaved[j] = interleaved[interleaved_idx];
@ -218,16 +219,20 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
} }
} }
void AudioBuffer::InterleaveTo(AudioFrame* frame) const { void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
assert(frame->_audioChannel == num_channels_); assert(frame->_audioChannel == num_channels_);
assert(frame->_payloadDataLengthInSamples == samples_per_channel_); assert(frame->_payloadDataLengthInSamples == samples_per_channel_);
frame->_vadActivity = activity_; frame->_vadActivity = activity_;
if (!data_changed) {
return;
}
if (num_channels_ == 1) { if (num_channels_ == 1) {
if (num_mixed_channels_ == 1) { if (data_was_mixed_) {
memcpy(frame->_payloadData, memcpy(frame->_payloadData,
channels_[0].data, channels_[0].data,
sizeof(WebRtc_Word16) * samples_per_channel_); sizeof(int16_t) * samples_per_channel_);
} else { } else {
// These should point to the same buffer in this case. // These should point to the same buffer in this case.
assert(data_ == frame->_payloadData); assert(data_ == frame->_payloadData);
@ -236,9 +241,9 @@ void AudioBuffer::InterleaveTo(AudioFrame* frame) const {
return; return;
} }
WebRtc_Word16* interleaved = frame->_payloadData; int16_t* interleaved = frame->_payloadData;
for (int i = 0; i < num_channels_; i++) { for (int i = 0; i < num_channels_; i++) {
WebRtc_Word16* deinterleaved = channels_[i].data; int16_t* deinterleaved = channels_[i].data;
int interleaved_idx = i; int interleaved_idx = i;
for (int j = 0; j < samples_per_channel_; j++) { for (int j = 0; j < samples_per_channel_; j++) {
interleaved[interleaved_idx] = deinterleaved[j]; interleaved[interleaved_idx] = deinterleaved[j];
@ -261,6 +266,19 @@ void AudioBuffer::Mix(int num_mixed_channels) {
samples_per_channel_); samples_per_channel_);
num_channels_ = num_mixed_channels; num_channels_ = num_mixed_channels;
data_was_mixed_ = true;
}
void AudioBuffer::CopyAndMix(int num_mixed_channels) {
// We currently only support the stereo to mono case.
assert(num_channels_ == 2);
assert(num_mixed_channels == 1);
StereoToMono(channels_[0].data,
channels_[1].data,
mixed_channels_[0].data,
samples_per_channel_);
num_mixed_channels_ = num_mixed_channels; num_mixed_channels_ = num_mixed_channels;
} }
@ -282,7 +300,7 @@ void AudioBuffer::CopyLowPassToReference() {
for (int i = 0; i < num_channels_; i++) { for (int i = 0; i < num_channels_; i++) {
memcpy(low_pass_reference_channels_[i].data, memcpy(low_pass_reference_channels_[i].data,
low_pass_split_data(i), low_pass_split_data(i),
sizeof(WebRtc_Word16) * samples_per_split_channel_); sizeof(int16_t) * samples_per_split_channel_);
} }
} }
} // namespace webrtc } // namespace webrtc

View File

@ -12,6 +12,7 @@
#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_
#include "module_common_types.h" #include "module_common_types.h"
#include "scoped_ptr.h"
#include "typedefs.h" #include "typedefs.h"
namespace webrtc { namespace webrtc {
@ -28,23 +29,30 @@ class AudioBuffer {
int samples_per_channel() const; int samples_per_channel() const;
int samples_per_split_channel() const; int samples_per_split_channel() const;
WebRtc_Word16* data(int channel) const; int16_t* data(int channel) const;
WebRtc_Word16* low_pass_split_data(int channel) const; int16_t* low_pass_split_data(int channel) const;
WebRtc_Word16* high_pass_split_data(int channel) const; int16_t* high_pass_split_data(int channel) const;
WebRtc_Word16* mixed_low_pass_data(int channel) const; int16_t* mixed_data(int channel) const;
WebRtc_Word16* low_pass_reference(int channel) const; int16_t* mixed_low_pass_data(int channel) const;
int16_t* low_pass_reference(int channel) const;
WebRtc_Word32* analysis_filter_state1(int channel) const; int32_t* analysis_filter_state1(int channel) const;
WebRtc_Word32* analysis_filter_state2(int channel) const; int32_t* analysis_filter_state2(int channel) const;
WebRtc_Word32* synthesis_filter_state1(int channel) const; int32_t* synthesis_filter_state1(int channel) const;
WebRtc_Word32* synthesis_filter_state2(int channel) const; int32_t* synthesis_filter_state2(int channel) const;
void set_activity(AudioFrame::VADActivity activity); void set_activity(AudioFrame::VADActivity activity);
AudioFrame::VADActivity activity(); AudioFrame::VADActivity activity() const;
bool is_muted() const;
void DeinterleaveFrom(AudioFrame* audioFrame); void DeinterleaveFrom(AudioFrame* audioFrame);
void InterleaveTo(AudioFrame* audioFrame) const; void InterleaveTo(AudioFrame* audioFrame) const;
// If |data_changed| is false, only the non-audio data members will be copied
// to |frame|.
void InterleaveTo(AudioFrame* frame, bool data_changed) const;
void Mix(int num_mixed_channels); void Mix(int num_mixed_channels);
void CopyAndMix(int num_mixed_channels);
void CopyAndMixLowPass(int num_mixed_channels); void CopyAndMixLowPass(int num_mixed_channels);
void CopyLowPassToReference(); void CopyLowPassToReference();
@ -53,18 +61,21 @@ class AudioBuffer {
int num_channels_; int num_channels_;
int num_mixed_channels_; int num_mixed_channels_;
int num_mixed_low_pass_channels_; int num_mixed_low_pass_channels_;
// Whether the original data was replaced with mixed data.
bool data_was_mixed_;
const int samples_per_channel_; const int samples_per_channel_;
int samples_per_split_channel_; int samples_per_split_channel_;
bool reference_copied_; bool reference_copied_;
AudioFrame::VADActivity activity_; AudioFrame::VADActivity activity_;
bool is_muted_;
WebRtc_Word16* data_; int16_t* data_;
// TODO(andrew): use vectors here. scoped_array<AudioChannel> channels_;
AudioChannel* channels_; scoped_array<SplitAudioChannel> split_channels_;
SplitAudioChannel* split_channels_; scoped_array<AudioChannel> mixed_channels_;
// TODO(andrew): improve this, we don't need the full 32 kHz space here. // TODO(andrew): improve this, we don't need the full 32 kHz space here.
AudioChannel* mixed_low_pass_channels_; scoped_array<AudioChannel> mixed_low_pass_channels_;
AudioChannel* low_pass_reference_channels_; scoped_array<AudioChannel> low_pass_reference_channels_;
}; };
} // namespace webrtc } // namespace webrtc

View File

@ -271,7 +271,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
if (debug_file_->Open()) { if (debug_file_->Open()) {
event_msg_->set_type(audioproc::Event::STREAM); event_msg_->set_type(audioproc::Event::STREAM);
audioproc::Stream* msg = event_msg_->mutable_stream(); audioproc::Stream* msg = event_msg_->mutable_stream();
const size_t data_size = sizeof(WebRtc_Word16) * const size_t data_size = sizeof(int16_t) *
frame->_payloadDataLengthInSamples * frame->_payloadDataLengthInSamples *
frame->_audioChannel; frame->_audioChannel;
msg->set_input_data(frame->_payloadData, data_size); msg->set_input_data(frame->_payloadData, data_size);
@ -285,12 +285,12 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
// TODO(ajm): experiment with mixing and AEC placement. // TODO(ajm): experiment with mixing and AEC placement.
if (num_output_channels_ < num_input_channels_) { if (num_output_channels_ < num_input_channels_) {
capture_audio_->Mix(num_output_channels_); capture_audio_->Mix(num_output_channels_);
frame->_audioChannel = num_output_channels_; frame->_audioChannel = num_output_channels_;
} }
if (sample_rate_hz_ == kSampleRate32kHz) { bool data_changed = stream_data_changed();
for (int i = 0; i < num_input_channels_; i++) { if (analysis_needed(data_changed)) {
for (int i = 0; i < num_output_channels_; i++) {
// Split into a low and high band. // Split into a low and high band.
SplittingFilterAnalysis(capture_audio_->data(i), SplittingFilterAnalysis(capture_audio_->data(i),
capture_audio_->low_pass_split_data(i), capture_audio_->low_pass_split_data(i),
@ -340,12 +340,7 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
return err; return err;
} }
//err = level_estimator_->ProcessCaptureAudio(capture_audio_); if (synthesis_needed(data_changed)) {
//if (err != kNoError) {
// return err;
//}
if (sample_rate_hz_ == kSampleRate32kHz) {
for (int i = 0; i < num_output_channels_; i++) { for (int i = 0; i < num_output_channels_; i++) {
// Recombine low and high bands. // Recombine low and high bands.
SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i), SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i),
@ -356,11 +351,17 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
} }
} }
capture_audio_->InterleaveTo(frame); // The level estimator operates on the recombined data.
err = level_estimator_->ProcessStream(capture_audio_);
if (err != kNoError) {
return err;
}
capture_audio_->InterleaveTo(frame, data_changed);
if (debug_file_->Open()) { if (debug_file_->Open()) {
audioproc::Stream* msg = event_msg_->mutable_stream(); audioproc::Stream* msg = event_msg_->mutable_stream();
const size_t data_size = sizeof(WebRtc_Word16) * const size_t data_size = sizeof(int16_t) *
frame->_payloadDataLengthInSamples * frame->_payloadDataLengthInSamples *
frame->_audioChannel; frame->_audioChannel;
msg->set_output_data(frame->_payloadData, data_size); msg->set_output_data(frame->_payloadData, data_size);
@ -396,7 +397,7 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
if (debug_file_->Open()) { if (debug_file_->Open()) {
event_msg_->set_type(audioproc::Event::REVERSE_STREAM); event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
const size_t data_size = sizeof(WebRtc_Word16) * const size_t data_size = sizeof(int16_t) *
frame->_payloadDataLengthInSamples * frame->_payloadDataLengthInSamples *
frame->_audioChannel; frame->_audioChannel;
msg->set_data(frame->_payloadData, data_size); msg->set_data(frame->_payloadData, data_size);
@ -436,11 +437,6 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
return err; return err;
} }
//err = level_estimator_->AnalyzeReverseStream(render_audio_);
//if (err != kNoError) {
// return err;
//}
was_stream_delay_set_ = false; was_stream_delay_set_ = false;
return err; // TODO(ajm): this is for returning warnings; necessary? return err; // TODO(ajm): this is for returning warnings; necessary?
} }
@ -648,4 +644,44 @@ int AudioProcessingImpl::WriteInitMessage() {
return kNoError; return kNoError;
} }
bool AudioProcessingImpl::stream_data_changed() const {
int enabled_count = 0;
std::list<ProcessingComponent*>::const_iterator it;
for (it = component_list_.begin(); it != component_list_.end(); it++) {
if ((*it)->is_component_enabled()) {
enabled_count++;
}
}
// Data is unchanged if no components are enabled, or if only level_estimator_
// or voice_detection_ is enabled.
if (enabled_count == 0) {
return false;
} else if (enabled_count == 1) {
if (level_estimator_->is_enabled() || voice_detection_->is_enabled()) {
return false;
}
} else if (enabled_count == 2) {
if (level_estimator_->is_enabled() && voice_detection_->is_enabled()) {
return false;
}
}
return true;
}
bool AudioProcessingImpl::synthesis_needed(bool stream_data_changed) const {
return (stream_data_changed && sample_rate_hz_ == kSampleRate32kHz);
}
bool AudioProcessingImpl::analysis_needed(bool stream_data_changed) const {
if (!stream_data_changed && !voice_detection_->is_enabled()) {
// Only level_estimator_ is enabled.
return false;
} else if (sample_rate_hz_ == kSampleRate32kHz) {
// Something besides level_estimator_ is enabled, and we have super-wb.
return true;
}
return false;
}
} // namespace webrtc } // namespace webrtc

View File

@ -81,6 +81,9 @@ class AudioProcessingImpl : public AudioProcessing {
private: private:
int WriteMessageToDebugFile(); int WriteMessageToDebugFile();
int WriteInitMessage(); int WriteInitMessage();
bool stream_data_changed() const;
bool synthesis_needed(bool stream_data_changed) const;
bool analysis_needed(bool stream_data_changed) const;
int id_; int id_;

View File

@ -496,27 +496,23 @@ class HighPassFilter {
}; };
// An estimation component used to retrieve level metrics. // An estimation component used to retrieve level metrics.
// NOTE: currently unavailable. All methods return errors.
class LevelEstimator { class LevelEstimator {
public: public:
virtual int Enable(bool enable) = 0; virtual int Enable(bool enable) = 0;
virtual bool is_enabled() const = 0; virtual bool is_enabled() const = 0;
// The metrics are reported in dBFs calculated as: // Returns the root mean square (RMS) level in dBFs (decibels from digital
// Level = 10log_10(P_s / P_max) [dBFs], where // full-scale), or alternately dBov. It is computed over all primary stream
// P_s is the signal power and P_max is the maximum possible (or peak) // frames since the last call to RMS(). The returned value is positive but
// power. With 16-bit signals, P_max = (2^15)^2. // should be interpreted as negative. It is constrained to [0, 127].
struct Metrics { //
AudioProcessing::Statistic signal; // Overall signal level. // The computation follows:
AudioProcessing::Statistic speech; // Speech level. // http://tools.ietf.org/html/draft-ietf-avtext-client-to-mixer-audio-level-05
AudioProcessing::Statistic noise; // Noise level. // with the intent that it can provide the RTP audio level indication.
}; //
// Frames passed to ProcessStream() with an |_energy| of zero are considered
virtual int GetMetrics(Metrics* metrics, Metrics* reverse_metrics) = 0; // to have been muted. The RMS of the frame will be interpreted as -127.
virtual int RMS() = 0;
//virtual int enable_noise_warning(bool enable) = 0;
//bool is_noise_warning_enabled() const = 0;
//virtual bool stream_has_high_noise() const = 0;
protected: protected:
virtual ~LevelEstimator() {}; virtual ~LevelEstimator() {};

View File

@ -10,73 +10,78 @@
#include "level_estimator_impl.h" #include "level_estimator_impl.h"
#include <cassert> #include <assert.h>
#include <cstring> #include <math.h>
#include <string.h>
#include "critical_section_wrapper.h"
#include "audio_processing_impl.h" #include "audio_processing_impl.h"
#include "audio_buffer.h" #include "audio_buffer.h"
#include "critical_section_wrapper.h"
// TODO(ajm): implement the underlying level estimator component.
namespace webrtc { namespace webrtc {
typedef void Handle;
namespace { namespace {
/*int EstimateLevel(AudioBuffer* audio, Handle* my_handle) {
assert(audio->samples_per_split_channel() <= 160);
WebRtc_Word16* mixed_data = audio->low_pass_split_data(0); const double kMaxSquaredLevel = 32768.0 * 32768.0;
if (audio->num_channels() > 1) {
audio->CopyAndMixLowPass(1); class Level {
mixed_data = audio->mixed_low_pass_data(0); public:
static const int kMinLevel = 127;
Level()
: sum_square_(0.0),
sample_count_(0) {}
~Level() {}
void Init() {
sum_square_ = 0.0;
sample_count_ = 0;
} }
int err = UpdateLvlEst(my_handle, void Process(int16_t* data, int length) {
mixed_data, assert(data != NULL);
audio->samples_per_split_channel()); assert(length > 0);
if (err != AudioProcessing::kNoError) { sum_square_ += SumSquare(data, length);
return GetHandleError(my_handle); sample_count_ += length;
} }
return AudioProcessing::kNoError; void ProcessMuted(int length) {
assert(length > 0);
sample_count_ += length;
} }
int GetMetricsLocal(Handle* my_handle, LevelEstimator::Metrics* metrics) { int RMS() {
level_t levels; if (sample_count_ == 0 || sum_square_ == 0.0) {
memset(&levels, 0, sizeof(levels)); Init();
return kMinLevel;
int err = ExportLevels(my_handle, &levels, 2);
if (err != AudioProcessing::kNoError) {
return err;
} }
metrics->signal.instant = levels.instant;
metrics->signal.average = levels.average;
metrics->signal.maximum = levels.max;
metrics->signal.minimum = levels.min;
err = ExportLevels(my_handle, &levels, 1); // Normalize by the max level.
if (err != AudioProcessing::kNoError) { double rms = sum_square_ / (sample_count_ * kMaxSquaredLevel);
return err; // 20log_10(x^0.5) = 10log_10(x)
rms = 10 * log10(rms);
if (rms > 0)
rms = 0;
else if (rms < -kMinLevel)
rms = -kMinLevel;
rms = -rms;
Init();
return static_cast<int>(rms + 0.5);
} }
metrics->speech.instant = levels.instant;
metrics->speech.average = levels.average;
metrics->speech.maximum = levels.max;
metrics->speech.minimum = levels.min;
err = ExportLevels(my_handle, &levels, 0); private:
if (err != AudioProcessing::kNoError) { static double SumSquare(int16_t* data, int length) {
return err; double sum_square = 0.0;
for (int i = 0; i < length; ++i) {
double data_d = static_cast<double>(data[i]);
sum_square += data_d * data_d;
}
return sum_square;
} }
metrics->noise.instant = levels.instant;
metrics->noise.average = levels.average;
metrics->noise.maximum = levels.max;
metrics->noise.minimum = levels.min;
return AudioProcessing::kNoError; double sum_square_;
}*/ int sample_count_;
};
} // namespace } // namespace
LevelEstimatorImpl::LevelEstimatorImpl(const AudioProcessingImpl* apm) LevelEstimatorImpl::LevelEstimatorImpl(const AudioProcessingImpl* apm)
@ -85,52 +90,44 @@ LevelEstimatorImpl::LevelEstimatorImpl(const AudioProcessingImpl* apm)
LevelEstimatorImpl::~LevelEstimatorImpl() {} LevelEstimatorImpl::~LevelEstimatorImpl() {}
int LevelEstimatorImpl::AnalyzeReverseStream(AudioBuffer* /*audio*/) { int LevelEstimatorImpl::ProcessStream(AudioBuffer* audio) {
return apm_->kUnsupportedComponentError; if (!is_component_enabled()) {
/*if (!is_component_enabled()) {
return apm_->kNoError; return apm_->kNoError;
} }
return EstimateLevel(audio, static_cast<Handle*>(handle(1)));*/ Level* level = static_cast<Level*>(handle(0));
} if (audio->is_muted()) {
level->ProcessMuted(audio->samples_per_channel());
int LevelEstimatorImpl::ProcessCaptureAudio(AudioBuffer* /*audio*/) {
return apm_->kUnsupportedComponentError;
/*if (!is_component_enabled()) {
return apm_->kNoError; return apm_->kNoError;
} }
return EstimateLevel(audio, static_cast<Handle*>(handle(0)));*/ int16_t* mixed_data = audio->data(0);
if (audio->num_channels() > 1) {
audio->CopyAndMix(1);
mixed_data = audio->mixed_data(0);
} }
int LevelEstimatorImpl::Enable(bool /*enable*/) { level->Process(mixed_data, audio->samples_per_channel());
return apm_->kNoError;
}
int LevelEstimatorImpl::Enable(bool enable) {
CriticalSectionScoped crit_scoped(*apm_->crit()); CriticalSectionScoped crit_scoped(*apm_->crit());
return apm_->kUnsupportedComponentError; return EnableComponent(enable);
//return EnableComponent(enable);
} }
bool LevelEstimatorImpl::is_enabled() const { bool LevelEstimatorImpl::is_enabled() const {
return is_component_enabled(); return is_component_enabled();
} }
int LevelEstimatorImpl::GetMetrics(LevelEstimator::Metrics* /*metrics*/, int LevelEstimatorImpl::RMS() {
LevelEstimator::Metrics* /*reverse_metrics*/) { if (!is_component_enabled()) {
return apm_->kUnsupportedComponentError;
/*if (!is_component_enabled()) {
return apm_->kNotEnabledError; return apm_->kNotEnabledError;
} }
int err = GetMetricsLocal(static_cast<Handle*>(handle(0)), metrics); Level* level = static_cast<Level*>(handle(0));
if (err != apm_->kNoError) { return level->RMS();
return err;
}
err = GetMetricsLocal(static_cast<Handle*>(handle(1)), reverse_metrics);
if (err != apm_->kNoError) {
return err;
}
return apm_->kNoError;*/
} }
int LevelEstimatorImpl::get_version(char* version, int LevelEstimatorImpl::get_version(char* version,
@ -141,37 +138,30 @@ int LevelEstimatorImpl::get_version(char* version,
} }
void* LevelEstimatorImpl::CreateHandle() const { void* LevelEstimatorImpl::CreateHandle() const {
Handle* handle = NULL; return new Level;
/*if (CreateLvlEst(&handle) != apm_->kNoError) { }
handle = NULL;
} else { int LevelEstimatorImpl::DestroyHandle(void* handle) const {
assert(handle != NULL); assert(handle != NULL);
}*/ Level* level = static_cast<Level*>(handle);
delete level;
return handle; return apm_->kNoError;
} }
int LevelEstimatorImpl::DestroyHandle(void* /*handle*/) const { int LevelEstimatorImpl::InitializeHandle(void* handle) const {
return apm_->kUnsupportedComponentError; assert(handle != NULL);
//return FreeLvlEst(static_cast<Handle*>(handle)); Level* level = static_cast<Level*>(handle);
} level->Init();
int LevelEstimatorImpl::InitializeHandle(void* /*handle*/) const { return apm_->kNoError;
return apm_->kUnsupportedComponentError;
/*const double kIntervalSeconds = 1.5;
return InitLvlEst(static_cast<Handle*>(handle),
apm_->sample_rate_hz(),
kIntervalSeconds);*/
} }
int LevelEstimatorImpl::ConfigureHandle(void* /*handle*/) const { int LevelEstimatorImpl::ConfigureHandle(void* /*handle*/) const {
return apm_->kUnsupportedComponentError; return apm_->kNoError;
//return apm_->kNoError;
} }
int LevelEstimatorImpl::num_handles_required() const { int LevelEstimatorImpl::num_handles_required() const {
return apm_->kUnsupportedComponentError; return 1;
//return 2;
} }
int LevelEstimatorImpl::GetHandleError(void* handle) const { int LevelEstimatorImpl::GetHandleError(void* handle) const {

View File

@ -24,8 +24,7 @@ class LevelEstimatorImpl : public LevelEstimator,
explicit LevelEstimatorImpl(const AudioProcessingImpl* apm); explicit LevelEstimatorImpl(const AudioProcessingImpl* apm);
virtual ~LevelEstimatorImpl(); virtual ~LevelEstimatorImpl();
int AnalyzeReverseStream(AudioBuffer* audio); int ProcessStream(AudioBuffer* audio);
int ProcessCaptureAudio(AudioBuffer* audio);
// LevelEstimator implementation. // LevelEstimator implementation.
virtual bool is_enabled() const; virtual bool is_enabled() const;
@ -36,7 +35,7 @@ class LevelEstimatorImpl : public LevelEstimator,
private: private:
// LevelEstimator implementation. // LevelEstimator implementation.
virtual int Enable(bool enable); virtual int Enable(bool enable);
virtual int GetMetrics(Metrics* metrics, Metrics* reverse_metrics); virtual int RMS();
// ProcessingComponent implementation. // ProcessingComponent implementation.
virtual void* CreateHandle() const; virtual void* CreateHandle() const;

View File

@ -18,16 +18,6 @@
namespace webrtc { namespace webrtc {
class AudioProcessingImpl; class AudioProcessingImpl;
/*template <class T>
class ComponentHandle {
public:
ComponentHandle();
virtual ~ComponentHandle();
virtual int Create() = 0;
virtual T* ptr() const = 0;
};*/
class ProcessingComponent { class ProcessingComponent {
public: public:
explicit ProcessingComponent(const AudioProcessingImpl* apm); explicit ProcessingComponent(const AudioProcessingImpl* apm);
@ -37,10 +27,11 @@ class ProcessingComponent {
virtual int Destroy(); virtual int Destroy();
virtual int get_version(char* version, int version_len_bytes) const = 0; virtual int get_version(char* version, int version_len_bytes) const = 0;
bool is_component_enabled() const;
protected: protected:
virtual int Configure(); virtual int Configure();
int EnableComponent(bool enable); int EnableComponent(bool enable);
bool is_component_enabled() const;
void* handle(int index) const; void* handle(int index) const;
int num_handles() const; int num_handles() const;

View File

@ -117,6 +117,8 @@ void usage() {
printf(" --ns_very_high\n"); printf(" --ns_very_high\n");
printf("\n -vad Voice activity detection\n"); printf("\n -vad Voice activity detection\n");
printf(" --vad_out_file FILE\n"); printf(" --vad_out_file FILE\n");
printf("\n Level metrics (enabled by default)\n");
printf(" --no_level_metrics\n");
printf("\n"); printf("\n");
printf("Modifiers:\n"); printf("Modifiers:\n");
printf(" --noasm Disable SSE optimization.\n"); printf(" --noasm Disable SSE optimization.\n");
@ -171,6 +173,7 @@ void void_main(int argc, char* argv[]) {
int extra_delay_ms = 0; int extra_delay_ms = 0;
//bool interleaved = true; //bool interleaved = true;
ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(true));
for (int i = 1; i < argc; i++) { for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-pb") == 0) { if (strcmp(argv[i], "-pb") == 0) {
i++; i++;
@ -250,6 +253,9 @@ void void_main(int argc, char* argv[]) {
ASSERT_EQ(apm->kNoError, ASSERT_EQ(apm->kNoError,
apm->echo_cancellation()->enable_delay_logging(false)); apm->echo_cancellation()->enable_delay_logging(false));
} else if (strcmp(argv[i], "--no_level_metrics") == 0) {
ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(false));
} else if (strcmp(argv[i], "-aecm") == 0) { } else if (strcmp(argv[i], "-aecm") == 0) {
ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(true)); ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(true));
@ -454,16 +460,16 @@ void void_main(int argc, char* argv[]) {
ASSERT_TRUE(NULL != out_file) << "Unable to open output audio file " ASSERT_TRUE(NULL != out_file) << "Unable to open output audio file "
<< out_filename; << out_filename;
int near_size_samples = 0; int near_size_bytes = 0;
if (pb_file) { if (pb_file) {
struct stat st; struct stat st;
stat(pb_filename, &st); stat(pb_filename, &st);
// Crude estimate, but should be good enough. // Crude estimate, but should be good enough.
near_size_samples = st.st_size / 3 / sizeof(int16_t); near_size_bytes = st.st_size / 3;
} else { } else {
struct stat st; struct stat st;
stat(near_filename, &st); stat(near_filename, &st);
near_size_samples = st.st_size / sizeof(int16_t); near_size_bytes = st.st_size;
} }
if (apm->voice_detection()->is_enabled()) { if (apm->voice_detection()->is_enabled()) {
@ -500,14 +506,11 @@ void void_main(int argc, char* argv[]) {
size_t read_count = 0; size_t read_count = 0;
int reverse_count = 0; int reverse_count = 0;
int primary_count = 0; int primary_count = 0;
int near_read_samples = 0; int near_read_bytes = 0;
TickInterval acc_ticks; TickInterval acc_ticks;
AudioFrame far_frame; AudioFrame far_frame;
far_frame._frequencyInHz = sample_rate_hz;
AudioFrame near_frame; AudioFrame near_frame;
near_frame._frequencyInHz = sample_rate_hz;
int delay_ms = 0; int delay_ms = 0;
int drift_samples = 0; int drift_samples = 0;
@ -556,14 +559,19 @@ void void_main(int argc, char* argv[]) {
samples_per_channel = msg.sample_rate() / 100; samples_per_channel = msg.sample_rate() / 100;
far_frame._frequencyInHz = msg.sample_rate(); far_frame._frequencyInHz = msg.sample_rate();
far_frame._payloadDataLengthInSamples = far_frame._payloadDataLengthInSamples = samples_per_channel;
msg.num_reverse_channels() * samples_per_channel; far_frame._audioChannel = msg.num_reverse_channels();
near_frame._frequencyInHz = msg.sample_rate(); near_frame._frequencyInHz = msg.sample_rate();
near_frame._payloadDataLengthInSamples = samples_per_channel;
if (verbose) { if (verbose) {
printf("Init at frame: %d (primary), %d (reverse)\n", printf("Init at frame: %d (primary), %d (reverse)\n",
primary_count, reverse_count); primary_count, reverse_count);
printf(" Sample rate: %d Hz\n", sample_rate_hz); printf(" Sample rate: %d Hz\n", sample_rate_hz);
printf(" Primary channels: %d (in), %d (out)\n",
msg.num_input_channels(),
msg.num_output_channels());
printf(" Reverse channels: %d \n", msg.num_reverse_channels());
} }
} else if (event_msg.type() == Event::REVERSE_STREAM) { } else if (event_msg.type() == Event::REVERSE_STREAM) {
@ -572,8 +580,8 @@ void void_main(int argc, char* argv[]) {
reverse_count++; reverse_count++;
ASSERT_TRUE(msg.has_data()); ASSERT_TRUE(msg.has_data());
ASSERT_EQ(sizeof(int16_t) * far_frame._payloadDataLengthInSamples, ASSERT_EQ(sizeof(int16_t) * samples_per_channel *
msg.data().size()); far_frame._audioChannel, msg.data().size());
memcpy(far_frame._payloadData, msg.data().data(), msg.data().size()); memcpy(far_frame._payloadData, msg.data().data(), msg.data().size());
if (perf_testing) { if (perf_testing) {
@ -600,21 +608,20 @@ void void_main(int argc, char* argv[]) {
const Stream msg = event_msg.stream(); const Stream msg = event_msg.stream();
primary_count++; primary_count++;
// ProcessStream could have changed this for the output frame.
near_frame._audioChannel = apm->num_input_channels(); near_frame._audioChannel = apm->num_input_channels();
near_frame._payloadDataLengthInSamples =
apm->num_input_channels() * samples_per_channel;
ASSERT_TRUE(msg.has_input_data()); ASSERT_TRUE(msg.has_input_data());
ASSERT_EQ(sizeof(int16_t) * near_frame._payloadDataLengthInSamples, ASSERT_EQ(sizeof(int16_t) * samples_per_channel *
msg.input_data().size()); near_frame._audioChannel, msg.input_data().size());
memcpy(near_frame._payloadData, memcpy(near_frame._payloadData,
msg.input_data().data(), msg.input_data().data(),
msg.input_data().size()); msg.input_data().size());
near_read_samples += near_frame._payloadDataLengthInSamples; near_read_bytes += msg.input_data().size();
if (progress && primary_count % 100 == 0) { if (progress && primary_count % 100 == 0) {
printf("%.0f%% complete\r", printf("%.0f%% complete\r",
(near_read_samples * 100.0) / near_size_samples); (near_read_bytes * 100.0) / near_size_bytes);
fflush(stdout); fflush(stdout);
} }
@ -635,6 +642,7 @@ void void_main(int argc, char* argv[]) {
} }
ASSERT_TRUE(err == apm->kNoError || ASSERT_TRUE(err == apm->kNoError ||
err == apm->kBadStreamParameterWarning); err == apm->kBadStreamParameterWarning);
ASSERT_TRUE(near_frame._audioChannel == apm->num_output_channels());
capture_level = apm->gain_control()->stream_analog_level(); capture_level = apm->gain_control()->stream_analog_level();
@ -663,10 +671,10 @@ void void_main(int argc, char* argv[]) {
} }
} }
ASSERT_EQ(near_frame._payloadDataLengthInSamples, size_t size = samples_per_channel * near_frame._audioChannel;
fwrite(near_frame._payloadData, ASSERT_EQ(size, fwrite(near_frame._payloadData,
sizeof(int16_t), sizeof(int16_t),
near_frame._payloadDataLengthInSamples, size,
out_file)); out_file));
} }
} }
@ -704,6 +712,12 @@ void void_main(int argc, char* argv[]) {
} }
} }
far_frame._frequencyInHz = sample_rate_hz;
far_frame._payloadDataLengthInSamples = samples_per_channel;
far_frame._audioChannel = num_render_channels;
near_frame._frequencyInHz = sample_rate_hz;
near_frame._payloadDataLengthInSamples = samples_per_channel;
if (event == kInitializeEvent || event == kResetEventDeprecated) { if (event == kInitializeEvent || event == kResetEventDeprecated) {
ASSERT_EQ(1u, ASSERT_EQ(1u,
fread(&sample_rate_hz, sizeof(sample_rate_hz), 1, event_file)); fread(&sample_rate_hz, sizeof(sample_rate_hz), 1, event_file));
@ -723,7 +737,10 @@ void void_main(int argc, char* argv[]) {
device_sample_rate_hz)); device_sample_rate_hz));
far_frame._frequencyInHz = sample_rate_hz; far_frame._frequencyInHz = sample_rate_hz;
far_frame._payloadDataLengthInSamples = samples_per_channel;
far_frame._audioChannel = num_render_channels;
near_frame._frequencyInHz = sample_rate_hz; near_frame._frequencyInHz = sample_rate_hz;
near_frame._payloadDataLengthInSamples = samples_per_channel;
if (verbose) { if (verbose) {
printf("Init at frame: %d (primary), %d (reverse)\n", printf("Init at frame: %d (primary), %d (reverse)\n",
@ -733,26 +750,23 @@ void void_main(int argc, char* argv[]) {
} else if (event == kRenderEvent) { } else if (event == kRenderEvent) {
reverse_count++; reverse_count++;
far_frame._audioChannel = num_render_channels;
far_frame._payloadDataLengthInSamples =
num_render_channels * samples_per_channel;
size_t size = samples_per_channel * num_render_channels;
read_count = fread(far_frame._payloadData, read_count = fread(far_frame._payloadData,
sizeof(WebRtc_Word16), sizeof(int16_t),
far_frame._payloadDataLengthInSamples, size,
far_file); far_file);
if (simulating) { if (simulating) {
if (read_count != far_frame._payloadDataLengthInSamples) { if (read_count != size) {
// Read an equal amount from the near file to avoid errors due to // Read an equal amount from the near file to avoid errors due to
// not reaching end-of-file. // not reaching end-of-file.
EXPECT_EQ(0, fseek(near_file, read_count * sizeof(WebRtc_Word16), EXPECT_EQ(0, fseek(near_file, read_count * sizeof(int16_t),
SEEK_CUR)); SEEK_CUR));
break; // This is expected. break; // This is expected.
} }
} else { } else {
ASSERT_EQ(read_count, ASSERT_EQ(size, read_count);
far_frame._payloadDataLengthInSamples);
} }
if (perf_testing) { if (perf_testing) {
@ -777,30 +791,28 @@ void void_main(int argc, char* argv[]) {
} else if (event == kCaptureEvent) { } else if (event == kCaptureEvent) {
primary_count++; primary_count++;
near_frame._audioChannel = num_capture_input_channels; near_frame._audioChannel = num_capture_input_channels;
near_frame._payloadDataLengthInSamples =
num_capture_input_channels * samples_per_channel;
size_t size = samples_per_channel * num_capture_input_channels;
read_count = fread(near_frame._payloadData, read_count = fread(near_frame._payloadData,
sizeof(WebRtc_Word16), sizeof(int16_t),
near_frame._payloadDataLengthInSamples, size,
near_file); near_file);
near_read_samples += read_count; near_read_bytes += read_count * sizeof(int16_t);
if (progress && primary_count % 100 == 0) { if (progress && primary_count % 100 == 0) {
printf("%.0f%% complete\r", printf("%.0f%% complete\r",
(near_read_samples * 100.0) / near_size_samples); (near_read_bytes * 100.0) / near_size_bytes);
fflush(stdout); fflush(stdout);
} }
if (simulating) { if (simulating) {
if (read_count != near_frame._payloadDataLengthInSamples) { if (read_count != size) {
break; // This is expected. break; // This is expected.
} }
delay_ms = 0; delay_ms = 0;
drift_samples = 0; drift_samples = 0;
} else { } else {
ASSERT_EQ(read_count, ASSERT_EQ(size, read_count);
near_frame._payloadDataLengthInSamples);
// TODO(ajm): sizeof(delay_ms) for current files? // TODO(ajm): sizeof(delay_ms) for current files?
ASSERT_EQ(1u, ASSERT_EQ(1u,
@ -829,6 +841,7 @@ void void_main(int argc, char* argv[]) {
} }
ASSERT_TRUE(err == apm->kNoError || ASSERT_TRUE(err == apm->kNoError ||
err == apm->kBadStreamParameterWarning); err == apm->kBadStreamParameterWarning);
ASSERT_TRUE(near_frame._audioChannel == apm->num_output_channels());
capture_level = apm->gain_control()->stream_analog_level(); capture_level = apm->gain_control()->stream_analog_level();
@ -857,10 +870,10 @@ void void_main(int argc, char* argv[]) {
} }
} }
ASSERT_EQ(near_frame._payloadDataLengthInSamples, size = samples_per_channel * near_frame._audioChannel;
fwrite(near_frame._payloadData, ASSERT_EQ(size, fwrite(near_frame._payloadData,
sizeof(WebRtc_Word16), sizeof(int16_t),
near_frame._payloadDataLengthInSamples, size,
out_file)); out_file));
} }
else { else {
@ -887,6 +900,10 @@ void void_main(int argc, char* argv[]) {
printf("\nProcessed frames: %d (primary), %d (reverse)\n", printf("\nProcessed frames: %d (primary), %d (reverse)\n",
primary_count, reverse_count); primary_count, reverse_count);
if (apm->level_estimator()->is_enabled()) {
printf("\n--Level metrics--\n");
printf("RMS: %d dBFS\n", -apm->level_estimator()->RMS());
}
if (apm->echo_cancellation()->are_metrics_enabled()) { if (apm->echo_cancellation()->are_metrics_enabled()) {
EchoCancellation::Metrics metrics; EchoCancellation::Metrics metrics;
apm->echo_cancellation()->GetMetrics(&metrics); apm->echo_cancellation()->GetMetrics(&metrics);

View File

@ -45,26 +45,25 @@ namespace {
// be set to true with the command-line switch --write_output_data. // be set to true with the command-line switch --write_output_data.
bool write_output_data = false; bool write_output_data = false;
class ApmEnvironment : public ::testing::Environment {
public:
virtual void SetUp() {
Trace::CreateTrace();
ASSERT_EQ(0, Trace::SetTraceFile("apm_trace.txt"));
}
virtual void TearDown() {
Trace::ReturnTrace();
}
};
class ApmTest : public ::testing::Test { class ApmTest : public ::testing::Test {
protected: protected:
ApmTest(); ApmTest();
virtual void SetUp(); virtual void SetUp();
virtual void TearDown(); virtual void TearDown();
static void SetUpTestCase() {
Trace::CreateTrace();
std::string trace_filename = webrtc::test::OutputPath() +
"audioproc_trace.txt";
ASSERT_EQ(0, Trace::SetTraceFile(trace_filename.c_str()));
}
static void TearDownTestCase() {
Trace::ReturnTrace();
}
// Path to where the resource files to be used for this test are located. // Path to where the resource files to be used for this test are located.
const std::string kResourcePath; const std::string resource_path;
const std::string kOutputFileName; const std::string output_filename;
webrtc::AudioProcessing* apm_; webrtc::AudioProcessing* apm_;
webrtc::AudioFrame* frame_; webrtc::AudioFrame* frame_;
webrtc::AudioFrame* revframe_; webrtc::AudioFrame* revframe_;
@ -73,12 +72,12 @@ class ApmTest : public ::testing::Test {
}; };
ApmTest::ApmTest() ApmTest::ApmTest()
: kResourcePath(webrtc::test::ProjectRootPath() + : resource_path(webrtc::test::ProjectRootPath() +
"test/data/audio_processing/"), "test/data/audio_processing/"),
#if defined(WEBRTC_APM_UNIT_TEST_FIXED_PROFILE) #if defined(WEBRTC_APM_UNIT_TEST_FIXED_PROFILE)
kOutputFileName(kResourcePath + "output_data_fixed.pb"), output_filename(resource_path + "output_data_fixed.pb"),
#elif defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE) #elif defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE)
kOutputFileName(kResourcePath + "output_data_float.pb"), output_filename(resource_path + "output_data_float.pb"),
#endif #endif
apm_(NULL), apm_(NULL),
frame_(NULL), frame_(NULL),
@ -104,11 +103,11 @@ void ApmTest::SetUp() {
revframe_->_audioChannel = 2; revframe_->_audioChannel = 2;
revframe_->_frequencyInHz = 32000; revframe_->_frequencyInHz = 32000;
std::string input_filename = kResourcePath + "aec_far.pcm"; std::string input_filename = resource_path + "aec_far.pcm";
far_file_ = fopen(input_filename.c_str(), "rb"); far_file_ = fopen(input_filename.c_str(), "rb");
ASSERT_TRUE(far_file_ != NULL) << "Could not open input file " << ASSERT_TRUE(far_file_ != NULL) << "Could not open input file " <<
input_filename << "\n"; input_filename << "\n";
input_filename = kResourcePath + "aec_near.pcm"; input_filename = resource_path + "aec_near.pcm";
near_file_ = fopen(input_filename.c_str(), "rb"); near_file_ = fopen(input_filename.c_str(), "rb");
ASSERT_TRUE(near_file_ != NULL) << "Could not open input file " << ASSERT_TRUE(near_file_ != NULL) << "Could not open input file " <<
input_filename << "\n"; input_filename << "\n";
@ -141,13 +140,13 @@ void ApmTest::TearDown() {
apm_ = NULL; apm_ = NULL;
} }
void MixStereoToMono(const WebRtc_Word16* stereo, void MixStereoToMono(const int16_t* stereo,
WebRtc_Word16* mono, int16_t* mono,
int num_samples) { int samples_per_channel) {
for (int i = 0; i < num_samples; i++) { for (int i = 0; i < samples_per_channel; i++) {
int int32 = (static_cast<int>(stereo[i * 2]) + int32_t int32 = (static_cast<int32_t>(stereo[i * 2]) +
static_cast<int>(stereo[i * 2 + 1])) >> 1; static_cast<int32_t>(stereo[i * 2 + 1])) >> 1;
mono[i] = static_cast<WebRtc_Word16>(int32); mono[i] = static_cast<int16_t>(int32);
} }
} }
@ -161,9 +160,16 @@ T AbsValue(T a) {
return a > 0 ? a : -a; return a > 0 ? a : -a;
} }
WebRtc_Word16 MaxAudioFrame(const AudioFrame& frame) { void SetFrameTo(AudioFrame* frame, int16_t value) {
for (int i = 0; i < frame->_payloadDataLengthInSamples * frame->_audioChannel;
++i) {
frame->_payloadData[i] = value;
}
}
int16_t MaxAudioFrame(const AudioFrame& frame) {
const int length = frame._payloadDataLengthInSamples * frame._audioChannel; const int length = frame._payloadDataLengthInSamples * frame._audioChannel;
WebRtc_Word16 max = AbsValue(frame._payloadData[0]); int16_t max = AbsValue(frame._payloadData[0]);
for (int i = 1; i < length; i++) { for (int i = 1; i < length; i++) {
max = MaxValue(max, AbsValue(frame._payloadData[i])); max = MaxValue(max, AbsValue(frame._payloadData[i]));
} }
@ -171,6 +177,23 @@ WebRtc_Word16 MaxAudioFrame(const AudioFrame& frame) {
return max; return max;
} }
bool FrameDataAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
if (frame1._payloadDataLengthInSamples !=
frame2._payloadDataLengthInSamples) {
return false;
}
if (frame1._audioChannel !=
frame2._audioChannel) {
return false;
}
if (memcmp(frame1._payloadData, frame2._payloadData,
frame1._payloadDataLengthInSamples * frame1._audioChannel *
sizeof(int16_t))) {
return false;
}
return true;
}
void TestStats(const AudioProcessing::Statistic& test, void TestStats(const AudioProcessing::Statistic& test,
const webrtc::audioproc::Test::Statistic& reference) { const webrtc::audioproc::Test::Statistic& reference) {
EXPECT_EQ(reference.instant(), test.instant); EXPECT_EQ(reference.instant(), test.instant);
@ -421,251 +444,6 @@ TEST_F(ApmTest, SampleRates) {
} }
} }
TEST_F(ApmTest, Process) {
GOOGLE_PROTOBUF_VERIFY_VERSION;
webrtc::audioproc::OutputData output_data;
if (!write_output_data) {
ReadMessageLiteFromFile(kOutputFileName, &output_data);
} else {
// We don't have a file; add the required tests to the protobuf.
// TODO(ajm): vary the output channels as well?
const int channels[] = {1, 2};
const size_t channels_size = sizeof(channels) / sizeof(*channels);
#if defined(WEBRTC_APM_UNIT_TEST_FIXED_PROFILE)
// AECM doesn't support super-wb.
const int sample_rates[] = {8000, 16000};
#elif defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE)
const int sample_rates[] = {8000, 16000, 32000};
#endif
const size_t sample_rates_size = sizeof(sample_rates) / sizeof(*sample_rates);
for (size_t i = 0; i < channels_size; i++) {
for (size_t j = 0; j < channels_size; j++) {
for (size_t k = 0; k < sample_rates_size; k++) {
webrtc::audioproc::Test* test = output_data.add_test();
test->set_num_reverse_channels(channels[i]);
test->set_num_input_channels(channels[j]);
test->set_num_output_channels(channels[j]);
test->set_sample_rate(sample_rates[k]);
}
}
}
}
#if defined(WEBRTC_APM_UNIT_TEST_FIXED_PROFILE)
EXPECT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(16000));
EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true));
EXPECT_EQ(apm_->kNoError,
apm_->gain_control()->set_mode(GainControl::kAdaptiveDigital));
EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true));
#elif defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE)
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->enable_drift_compensation(true));
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->enable_metrics(true));
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->enable_delay_logging(true));
EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true));
EXPECT_EQ(apm_->kNoError,
apm_->gain_control()->set_mode(GainControl::kAdaptiveAnalog));
EXPECT_EQ(apm_->kNoError,
apm_->gain_control()->set_analog_level_limits(0, 255));
EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true));
#endif
EXPECT_EQ(apm_->kNoError,
apm_->high_pass_filter()->Enable(true));
//EXPECT_EQ(apm_->kNoError,
// apm_->level_estimator()->Enable(true));
EXPECT_EQ(apm_->kNoError,
apm_->noise_suppression()->Enable(true));
EXPECT_EQ(apm_->kNoError,
apm_->voice_detection()->Enable(true));
for (int i = 0; i < output_data.test_size(); i++) {
printf("Running test %d of %d...\n", i + 1, output_data.test_size());
webrtc::audioproc::Test* test = output_data.mutable_test(i);
const int num_samples = test->sample_rate() / 100;
revframe_->_payloadDataLengthInSamples = num_samples;
revframe_->_audioChannel = test->num_reverse_channels();
revframe_->_frequencyInHz = test->sample_rate();
frame_->_payloadDataLengthInSamples = num_samples;
frame_->_audioChannel = test->num_input_channels();
frame_->_frequencyInHz = test->sample_rate();
EXPECT_EQ(apm_->kNoError, apm_->Initialize());
ASSERT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(test->sample_rate()));
ASSERT_EQ(apm_->kNoError, apm_->set_num_channels(frame_->_audioChannel,
frame_->_audioChannel));
ASSERT_EQ(apm_->kNoError,
apm_->set_num_reverse_channels(revframe_->_audioChannel));
int frame_count = 0;
int has_echo_count = 0;
int has_voice_count = 0;
int is_saturated_count = 0;
int analog_level = 127;
int analog_level_average = 0;
int max_output_average = 0;
while (1) {
WebRtc_Word16 temp_data[640];
// Read far-end frame
size_t read_count = fread(temp_data,
sizeof(WebRtc_Word16),
num_samples * 2,
far_file_);
if (read_count != static_cast<size_t>(num_samples * 2)) {
// Check that the file really ended.
ASSERT_NE(0, feof(far_file_));
break; // This is expected.
}
if (revframe_->_audioChannel == 1) {
MixStereoToMono(temp_data, revframe_->_payloadData,
revframe_->_payloadDataLengthInSamples);
} else {
memcpy(revframe_->_payloadData,
&temp_data[0],
sizeof(WebRtc_Word16) * read_count);
}
EXPECT_EQ(apm_->kNoError,
apm_->AnalyzeReverseStream(revframe_));
EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0));
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->set_stream_drift_samples(0));
EXPECT_EQ(apm_->kNoError,
apm_->gain_control()->set_stream_analog_level(analog_level));
// Read near-end frame
read_count = fread(temp_data,
sizeof(WebRtc_Word16),
num_samples * 2,
near_file_);
if (read_count != static_cast<size_t>(num_samples * 2)) {
// Check that the file really ended.
ASSERT_NE(0, feof(near_file_));
break; // This is expected.
}
if (frame_->_audioChannel == 1) {
MixStereoToMono(temp_data, frame_->_payloadData, num_samples);
} else {
memcpy(frame_->_payloadData,
&temp_data[0],
sizeof(WebRtc_Word16) * read_count);
}
frame_->_vadActivity = AudioFrame::kVadUnknown;
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
max_output_average += MaxAudioFrame(*frame_);
if (apm_->echo_cancellation()->stream_has_echo()) {
has_echo_count++;
}
analog_level = apm_->gain_control()->stream_analog_level();
analog_level_average += analog_level;
if (apm_->gain_control()->stream_is_saturated()) {
is_saturated_count++;
}
if (apm_->voice_detection()->stream_has_voice()) {
has_voice_count++;
EXPECT_EQ(AudioFrame::kVadActive, frame_->_vadActivity);
} else {
EXPECT_EQ(AudioFrame::kVadPassive, frame_->_vadActivity);
}
frame_count++;
}
max_output_average /= frame_count;
analog_level_average /= frame_count;
//LevelEstimator::Metrics far_metrics;
//LevelEstimator::Metrics near_metrics;
//EXPECT_EQ(apm_->kNoError,
// apm_->level_estimator()->GetMetrics(&near_metrics,
#if defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE)
EchoCancellation::Metrics echo_metrics;
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->GetMetrics(&echo_metrics));
int median = 0;
int std = 0;
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->GetDelayMetrics(&median, &std));
#endif
if (!write_output_data) {
EXPECT_EQ(test->has_echo_count(), has_echo_count);
EXPECT_EQ(test->has_voice_count(), has_voice_count);
EXPECT_EQ(test->is_saturated_count(), is_saturated_count);
EXPECT_EQ(test->analog_level_average(), analog_level_average);
EXPECT_EQ(test->max_output_average(), max_output_average);
#if defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE)
webrtc::audioproc::Test::EchoMetrics reference =
test->echo_metrics();
TestStats(echo_metrics.residual_echo_return_loss,
reference.residual_echo_return_loss());
TestStats(echo_metrics.echo_return_loss,
reference.echo_return_loss());
TestStats(echo_metrics.echo_return_loss_enhancement,
reference.echo_return_loss_enhancement());
TestStats(echo_metrics.a_nlp,
reference.a_nlp());
webrtc::audioproc::Test::DelayMetrics reference_delay =
test->delay_metrics();
EXPECT_EQ(median, reference_delay.median());
EXPECT_EQ(std, reference_delay.std());
#endif
} else {
test->set_has_echo_count(has_echo_count);
test->set_has_voice_count(has_voice_count);
test->set_is_saturated_count(is_saturated_count);
test->set_analog_level_average(analog_level_average);
test->set_max_output_average(max_output_average);
#if defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE)
webrtc::audioproc::Test::EchoMetrics* message =
test->mutable_echo_metrics();
WriteStatsMessage(echo_metrics.residual_echo_return_loss,
message->mutable_residual_echo_return_loss());
WriteStatsMessage(echo_metrics.echo_return_loss,
message->mutable_echo_return_loss());
WriteStatsMessage(echo_metrics.echo_return_loss_enhancement,
message->mutable_echo_return_loss_enhancement());
WriteStatsMessage(echo_metrics.a_nlp,
message->mutable_a_nlp());
webrtc::audioproc::Test::DelayMetrics* message_delay =
test->mutable_delay_metrics();
message_delay->set_median(median);
message_delay->set_std(std);
#endif
}
rewind(far_file_);
rewind(near_file_);
}
if (write_output_data) {
WriteMessageLiteToFile(kOutputFileName, output_data);
}
}
TEST_F(ApmTest, EchoCancellation) { TEST_F(ApmTest, EchoCancellation) {
EXPECT_EQ(apm_->kNoError, EXPECT_EQ(apm_->kNoError,
@ -948,13 +726,78 @@ TEST_F(ApmTest, HighPassFilter) {
} }
TEST_F(ApmTest, LevelEstimator) { TEST_F(ApmTest, LevelEstimator) {
// Turing Level estimator on/off // Turning level estimator on/off
EXPECT_EQ(apm_->kUnsupportedComponentError, EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false));
apm_->level_estimator()->Enable(true));
EXPECT_FALSE(apm_->level_estimator()->is_enabled());
EXPECT_EQ(apm_->kUnsupportedComponentError,
apm_->level_estimator()->Enable(false));
EXPECT_FALSE(apm_->level_estimator()->is_enabled()); EXPECT_FALSE(apm_->level_estimator()->is_enabled());
EXPECT_EQ(apm_->kNotEnabledError, apm_->level_estimator()->RMS());
EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true));
EXPECT_TRUE(apm_->level_estimator()->is_enabled());
// Run this test in wideband; in super-wb, the splitting filter distorts the
// audio enough to cause deviation from the expectation for small values.
EXPECT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(16000));
frame_->_payloadDataLengthInSamples = 160;
frame_->_audioChannel = 2;
frame_->_frequencyInHz = 16000;
// Min value if no frames have been processed.
EXPECT_EQ(127, apm_->level_estimator()->RMS());
// Min value on zero frames.
SetFrameTo(frame_, 0);
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(127, apm_->level_estimator()->RMS());
// Try a few RMS values.
// (These also test that the value resets after retrieving it.)
SetFrameTo(frame_, 32767);
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(0, apm_->level_estimator()->RMS());
SetFrameTo(frame_, 30000);
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(1, apm_->level_estimator()->RMS());
SetFrameTo(frame_, 10000);
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(10, apm_->level_estimator()->RMS());
SetFrameTo(frame_, 10);
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(70, apm_->level_estimator()->RMS());
// Min value if _energy == 0.
SetFrameTo(frame_, 10000);
uint32_t energy = frame_->_energy; // Save default to restore below.
frame_->_energy = 0;
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(127, apm_->level_estimator()->RMS());
frame_->_energy = energy;
// Verify reset after enable/disable.
SetFrameTo(frame_, 32767);
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false));
EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true));
SetFrameTo(frame_, 1);
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(90, apm_->level_estimator()->RMS());
// Verify reset after initialize.
SetFrameTo(frame_, 32767);
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError, apm_->Initialize());
SetFrameTo(frame_, 1);
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(90, apm_->level_estimator()->RMS());
} }
TEST_F(ApmTest, VoiceDetection) { TEST_F(ApmTest, VoiceDetection) {
@ -1028,12 +871,325 @@ TEST_F(ApmTest, VoiceDetection) {
// TODO(bjornv): Add tests for streamed voice; stream_has_voice() // TODO(bjornv): Add tests for streamed voice; stream_has_voice()
} }
TEST_F(ApmTest, SplittingFilter) {
// Verify the filter is not active through undistorted audio when:
// 1. No components are enabled...
SetFrameTo(frame_, 1000);
AudioFrame frame_copy = *frame_;
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
// 2. Only the level estimator is enabled...
SetFrameTo(frame_, 1000);
frame_copy = *frame_;
EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false));
// 3. Only VAD is enabled...
SetFrameTo(frame_, 1000);
frame_copy = *frame_;
EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false));
// 4. Both VAD and the level estimator are enabled...
SetFrameTo(frame_, 1000);
frame_copy = *frame_;
EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true));
EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
EXPECT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(false));
EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false));
// 5. Not using super-wb.
EXPECT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(16000));
frame_->_payloadDataLengthInSamples = 160;
frame_->_audioChannel = 2;
frame_->_frequencyInHz = 16000;
// Enable AEC, which would require the filter in super-wb. We rely on the
// first few frames of data being unaffected by the AEC.
// TODO(andrew): This test, and the one below, rely rather tenuously on the
// behavior of the AEC. Think of something more robust.
EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true));
SetFrameTo(frame_, 1000);
frame_copy = *frame_;
EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0));
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->set_stream_drift_samples(0));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0));
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->set_stream_drift_samples(0));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_TRUE(FrameDataAreEqual(*frame_, frame_copy));
// Check the test is valid. We should have distortion from the filter
// when AEC is enabled (which won't affect the audio).
EXPECT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(32000));
frame_->_payloadDataLengthInSamples = 320;
frame_->_audioChannel = 2;
frame_->_frequencyInHz = 32000;
SetFrameTo(frame_, 1000);
frame_copy = *frame_;
EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0));
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->set_stream_drift_samples(0));
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_FALSE(FrameDataAreEqual(*frame_, frame_copy));
}
TEST_F(ApmTest, Process) {
GOOGLE_PROTOBUF_VERIFY_VERSION;
webrtc::audioproc::OutputData output_data;
if (!write_output_data) {
ReadMessageLiteFromFile(output_filename, &output_data);
} else {
// We don't have a file; add the required tests to the protobuf.
// TODO(ajm): vary the output channels as well?
const int channels[] = {1, 2};
const size_t channels_size = sizeof(channels) / sizeof(*channels);
#if defined(WEBRTC_APM_UNIT_TEST_FIXED_PROFILE)
// AECM doesn't support super-wb.
const int sample_rates[] = {8000, 16000};
#elif defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE)
const int sample_rates[] = {8000, 16000, 32000};
#endif
const size_t sample_rates_size = sizeof(sample_rates) / sizeof(*sample_rates);
for (size_t i = 0; i < channels_size; i++) {
for (size_t j = 0; j < channels_size; j++) {
for (size_t k = 0; k < sample_rates_size; k++) {
webrtc::audioproc::Test* test = output_data.add_test();
test->set_num_reverse_channels(channels[i]);
test->set_num_input_channels(channels[j]);
test->set_num_output_channels(channels[j]);
test->set_sample_rate(sample_rates[k]);
}
}
}
}
#if defined(WEBRTC_APM_UNIT_TEST_FIXED_PROFILE)
EXPECT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(16000));
EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true));
EXPECT_EQ(apm_->kNoError,
apm_->gain_control()->set_mode(GainControl::kAdaptiveDigital));
EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true));
#elif defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE)
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->enable_drift_compensation(true));
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->enable_metrics(true));
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->enable_delay_logging(true));
EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true));
EXPECT_EQ(apm_->kNoError,
apm_->gain_control()->set_mode(GainControl::kAdaptiveAnalog));
EXPECT_EQ(apm_->kNoError,
apm_->gain_control()->set_analog_level_limits(0, 255));
EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true));
#endif
EXPECT_EQ(apm_->kNoError,
apm_->high_pass_filter()->Enable(true));
EXPECT_EQ(apm_->kNoError,
apm_->level_estimator()->Enable(true));
EXPECT_EQ(apm_->kNoError,
apm_->noise_suppression()->Enable(true));
EXPECT_EQ(apm_->kNoError,
apm_->voice_detection()->Enable(true));
for (int i = 0; i < output_data.test_size(); i++) {
printf("Running test %d of %d...\n", i + 1, output_data.test_size());
webrtc::audioproc::Test* test = output_data.mutable_test(i);
const int samples_per_channel = test->sample_rate() / 100;
revframe_->_payloadDataLengthInSamples = samples_per_channel;
revframe_->_audioChannel = test->num_reverse_channels();
revframe_->_frequencyInHz = test->sample_rate();
frame_->_payloadDataLengthInSamples = samples_per_channel;
frame_->_audioChannel = test->num_input_channels();
frame_->_frequencyInHz = test->sample_rate();
EXPECT_EQ(apm_->kNoError, apm_->Initialize());
ASSERT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(test->sample_rate()));
ASSERT_EQ(apm_->kNoError, apm_->set_num_channels(frame_->_audioChannel,
frame_->_audioChannel));
ASSERT_EQ(apm_->kNoError,
apm_->set_num_reverse_channels(revframe_->_audioChannel));
int frame_count = 0;
int has_echo_count = 0;
int has_voice_count = 0;
int is_saturated_count = 0;
int analog_level = 127;
int analog_level_average = 0;
int max_output_average = 0;
while (1) {
// Read far-end frame
const size_t frame_size = samples_per_channel * 2;
size_t read_count = fread(revframe_->_payloadData,
sizeof(int16_t),
frame_size,
far_file_);
if (read_count != frame_size) {
// Check that the file really ended.
ASSERT_NE(0, feof(far_file_));
break; // This is expected.
}
if (revframe_->_audioChannel == 1) {
MixStereoToMono(revframe_->_payloadData, revframe_->_payloadData,
samples_per_channel);
}
EXPECT_EQ(apm_->kNoError, apm_->AnalyzeReverseStream(revframe_));
EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0));
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->set_stream_drift_samples(0));
EXPECT_EQ(apm_->kNoError,
apm_->gain_control()->set_stream_analog_level(analog_level));
// Read near-end frame
read_count = fread(frame_->_payloadData,
sizeof(int16_t),
frame_size,
near_file_);
if (read_count != frame_size) {
// Check that the file really ended.
ASSERT_NE(0, feof(near_file_));
break; // This is expected.
}
if (frame_->_audioChannel == 1) {
MixStereoToMono(frame_->_payloadData, frame_->_payloadData,
samples_per_channel);
}
frame_->_vadActivity = AudioFrame::kVadUnknown;
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
max_output_average += MaxAudioFrame(*frame_);
if (apm_->echo_cancellation()->stream_has_echo()) {
has_echo_count++;
}
analog_level = apm_->gain_control()->stream_analog_level();
analog_level_average += analog_level;
if (apm_->gain_control()->stream_is_saturated()) {
is_saturated_count++;
}
if (apm_->voice_detection()->stream_has_voice()) {
has_voice_count++;
EXPECT_EQ(AudioFrame::kVadActive, frame_->_vadActivity);
} else {
EXPECT_EQ(AudioFrame::kVadPassive, frame_->_vadActivity);
}
frame_count++;
}
max_output_average /= frame_count;
analog_level_average /= frame_count;
#if defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE)
EchoCancellation::Metrics echo_metrics;
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->GetMetrics(&echo_metrics));
int median = 0;
int std = 0;
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->GetDelayMetrics(&median, &std));
int rms_level = apm_->level_estimator()->RMS();
EXPECT_LE(0, rms_level);
EXPECT_GE(127, rms_level);
#endif
if (!write_output_data) {
EXPECT_EQ(test->has_echo_count(), has_echo_count);
EXPECT_EQ(test->has_voice_count(), has_voice_count);
EXPECT_EQ(test->is_saturated_count(), is_saturated_count);
EXPECT_EQ(test->analog_level_average(), analog_level_average);
EXPECT_EQ(test->max_output_average(), max_output_average);
#if defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE)
webrtc::audioproc::Test::EchoMetrics reference =
test->echo_metrics();
TestStats(echo_metrics.residual_echo_return_loss,
reference.residual_echo_return_loss());
TestStats(echo_metrics.echo_return_loss,
reference.echo_return_loss());
TestStats(echo_metrics.echo_return_loss_enhancement,
reference.echo_return_loss_enhancement());
TestStats(echo_metrics.a_nlp,
reference.a_nlp());
webrtc::audioproc::Test::DelayMetrics reference_delay =
test->delay_metrics();
EXPECT_EQ(median, reference_delay.median());
EXPECT_EQ(std, reference_delay.std());
EXPECT_EQ(test->rms_level(), rms_level);
#endif
} else {
test->set_has_echo_count(has_echo_count);
test->set_has_voice_count(has_voice_count);
test->set_is_saturated_count(is_saturated_count);
test->set_analog_level_average(analog_level_average);
test->set_max_output_average(max_output_average);
#if defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE)
webrtc::audioproc::Test::EchoMetrics* message =
test->mutable_echo_metrics();
WriteStatsMessage(echo_metrics.residual_echo_return_loss,
message->mutable_residual_echo_return_loss());
WriteStatsMessage(echo_metrics.echo_return_loss,
message->mutable_echo_return_loss());
WriteStatsMessage(echo_metrics.echo_return_loss_enhancement,
message->mutable_echo_return_loss_enhancement());
WriteStatsMessage(echo_metrics.a_nlp,
message->mutable_a_nlp());
webrtc::audioproc::Test::DelayMetrics* message_delay =
test->mutable_delay_metrics();
message_delay->set_median(median);
message_delay->set_std(std);
test->set_rms_level(rms_level);
#endif
}
rewind(far_file_);
rewind(near_file_);
}
if (write_output_data) {
WriteMessageLiteToFile(output_filename, output_data);
}
}
} // namespace } // namespace
int main(int argc, char** argv) { int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv); ::testing::InitGoogleTest(&argc, argv);
ApmEnvironment* env = new ApmEnvironment; // GTest takes ownership.
::testing::AddGlobalTestEnvironment(env);
for (int i = 1; i < argc; i++) { for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "--write_output_data") == 0) { if (strcmp(argv[i], "--write_output_data") == 0) {

View File

@ -42,6 +42,8 @@ message Test {
} }
optional DelayMetrics delay_metrics = 12; optional DelayMetrics delay_metrics = 12;
optional int32 rms_level = 13;
} }
message OutputData { message OutputData {

View File

@ -50,10 +50,11 @@ Channel::SendData(FrameType frameType,
if (_includeAudioLevelIndication) if (_includeAudioLevelIndication)
{ {
assert(_rtpAudioProc.get() != NULL);
// Store current audio level in the RTP/RTCP module. // Store current audio level in the RTP/RTCP module.
// The level will be used in combination with voice-activity state // The level will be used in combination with voice-activity state
// (frameType) to add an RTP header extension // (frameType) to add an RTP header extension
_rtpRtcpModule.SetAudioLevel(_audioLevel_dBov); _rtpRtcpModule.SetAudioLevel(_rtpAudioProc->level_estimator()->RMS());
} }
// Push data from ACM to RTP/RTCP-module to deliver audio frame for // Push data from ACM to RTP/RTCP-module to deliver audio frame for
@ -1085,7 +1086,6 @@ Channel::Channel(const WebRtc_Word32 channelId,
_rtpDumpOut(*RtpDump::CreateRtpDump()), _rtpDumpOut(*RtpDump::CreateRtpDump()),
_outputAudioLevel(), _outputAudioLevel(),
_externalTransport(false), _externalTransport(false),
_audioLevel_dBov(100),
_inputFilePlayerPtr(NULL), _inputFilePlayerPtr(NULL),
_outputFilePlayerPtr(NULL), _outputFilePlayerPtr(NULL),
_outputFileRecorderPtr(NULL), _outputFileRecorderPtr(NULL),
@ -1119,6 +1119,7 @@ Channel::Channel(const WebRtc_Word32 channelId,
_callbackCritSectPtr(NULL), _callbackCritSectPtr(NULL),
_transportPtr(NULL), _transportPtr(NULL),
_encryptionPtr(NULL), _encryptionPtr(NULL),
_rtpAudioProc(NULL),
_rxAudioProcessingModulePtr(NULL), _rxAudioProcessingModulePtr(NULL),
#ifdef WEBRTC_DTMF_DETECTION #ifdef WEBRTC_DTMF_DETECTION
_telephoneEventDetectionPtr(NULL), _telephoneEventDetectionPtr(NULL),
@ -1546,16 +1547,6 @@ Channel::Init()
return -1; return -1;
} }
if (_rxAudioProcessingModulePtr->echo_cancellation()->
set_device_sample_rate_hz(
kVoiceEngineAudioProcessingDeviceSampleRateHz))
{
_engineStatisticsPtr->SetLastError(
VE_APM_ERROR, kTraceWarning,
"Channel::Init() failed to set the device sample rate to 48K"
" for far-end AP module");
}
if (_rxAudioProcessingModulePtr->set_sample_rate_hz(8000)) if (_rxAudioProcessingModulePtr->set_sample_rate_hz(8000))
{ {
_engineStatisticsPtr->SetLastError( _engineStatisticsPtr->SetLastError(
@ -1568,16 +1559,7 @@ Channel::Init()
{ {
_engineStatisticsPtr->SetLastError( _engineStatisticsPtr->SetLastError(
VE_SOUNDCARD_ERROR, kTraceWarning, VE_SOUNDCARD_ERROR, kTraceWarning,
"Init() failed to set channels for the primary audio" "Init() failed to set channels for the primary audio stream");
" stream");
}
if (_rxAudioProcessingModulePtr->set_num_reverse_channels(1) != 0)
{
_engineStatisticsPtr->SetLastError(
VE_SOUNDCARD_ERROR, kTraceWarning,
"Init() failed to set channels for the primary audio"
" stream");
} }
if (_rxAudioProcessingModulePtr->high_pass_filter()->Enable( if (_rxAudioProcessingModulePtr->high_pass_filter()->Enable(
@ -5164,6 +5146,25 @@ Channel::GetRemoteCSRCs(unsigned int arrCSRC[15])
int int
Channel::SetRTPAudioLevelIndicationStatus(bool enable, unsigned char ID) Channel::SetRTPAudioLevelIndicationStatus(bool enable, unsigned char ID)
{ {
if (_rtpAudioProc.get() == NULL)
{
_rtpAudioProc.reset(AudioProcessing::Create(VoEModuleId(_instanceId,
_channelId)));
if (_rtpAudioProc.get() == NULL)
{
_engineStatisticsPtr->SetLastError(VE_NO_MEMORY, kTraceCritical,
"Failed to create AudioProcessing");
return -1;
}
}
if (_rtpAudioProc->level_estimator()->Enable(enable) !=
AudioProcessing::kNoError)
{
_engineStatisticsPtr->SetLastError(VE_APM_ERROR, kTraceWarning,
"Failed to enable AudioProcessing::level_estimator()");
}
_includeAudioLevelIndication = enable; _includeAudioLevelIndication = enable;
return _rtpRtcpModule.SetRTPAudioLevelIndicationStatus(enable, ID); return _rtpRtcpModule.SetRTPAudioLevelIndicationStatus(enable, ID);
} }
@ -5837,14 +5838,12 @@ Channel::InsertExtraRTPPacket(unsigned char payloadType,
} }
WebRtc_UWord32 WebRtc_UWord32
Channel::Demultiplex(const AudioFrame& audioFrame, Channel::Demultiplex(const AudioFrame& audioFrame)
const WebRtc_UWord8 audioLevel_dBov)
{ {
WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_instanceId,_channelId), WEBRTC_TRACE(kTraceStream, kTraceVoice, VoEId(_instanceId,_channelId),
"Channel::Demultiplex(audioLevel_dBov=%u)", audioLevel_dBov); "Channel::Demultiplex()");
_audioFrame = audioFrame; _audioFrame = audioFrame;
_audioFrame._id = _channelId; _audioFrame._id = _channelId;
_audioLevel_dBov = audioLevel_dBov;
return 0; return 0;
} }
@ -5889,6 +5888,40 @@ Channel::PrepareEncodeAndSend(int mixingFrequency)
InsertInbandDtmfTone(); InsertInbandDtmfTone();
if (_includeAudioLevelIndication)
{
assert(_rtpAudioProc.get() != NULL);
// Check if settings need to be updated.
if (_rtpAudioProc->sample_rate_hz() != _audioFrame._frequencyInHz)
{
if (_rtpAudioProc->set_sample_rate_hz(_audioFrame._frequencyInHz) !=
AudioProcessing::kNoError)
{
WEBRTC_TRACE(kTraceWarning, kTraceVoice,
VoEId(_instanceId, _channelId),
"Error setting AudioProcessing sample rate");
return -1;
}
}
if (_rtpAudioProc->num_input_channels() != _audioFrame._audioChannel)
{
if (_rtpAudioProc->set_num_channels(_audioFrame._audioChannel,
_audioFrame._audioChannel)
!= AudioProcessing::kNoError)
{
WEBRTC_TRACE(kTraceWarning, kTraceVoice,
VoEId(_instanceId, _channelId),
"Error setting AudioProcessing channels");
return -1;
}
}
// Performs level analysis only; does not affect the signal.
_rtpAudioProc->ProcessStream(&_audioFrame);
}
return 0; return 0;
} }
@ -6632,10 +6665,11 @@ Channel::ApmProcessRx(AudioFrame& audioFrame)
"Channel::ApmProcessRx()"); "Channel::ApmProcessRx()");
// Reset the APM frequency if the frequency has changed // Reset the APM frequency if the frequency has changed
if(_rxAudioProcessingModulePtr->sample_rate_hz()!=audioFrame._frequencyInHz) if (_rxAudioProcessingModulePtr->sample_rate_hz() !=
audioFrame._frequencyInHz)
{ {
if (_rxAudioProcessingModulePtr->set_sample_rate_hz( if (_rxAudioProcessingModulePtr->set_sample_rate_hz(
audioFrame._frequencyInHz)) audioFrame._frequencyInHz) != 0)
{ {
WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId,-1), WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId,-1),
"AudioProcessingModule::set_sample_rate_hz(" "AudioProcessingModule::set_sample_rate_hz("
@ -6644,7 +6678,7 @@ Channel::ApmProcessRx(AudioFrame& audioFrame)
} }
} }
if (_rxAudioProcessingModulePtr->ProcessStream(&audioFrame) == -1) if (_rxAudioProcessingModulePtr->ProcessStream(&audioFrame) != 0)
{ {
WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId,-1), WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId,-1),
"AudioProcessingModule::ProcessStream() => error"); "AudioProcessingModule::ProcessStream() => error");

View File

@ -11,28 +11,28 @@
#ifndef WEBRTC_VOICE_ENGINE_CHANNEL_H #ifndef WEBRTC_VOICE_ENGINE_CHANNEL_H
#define WEBRTC_VOICE_ENGINE_CHANNEL_H #define WEBRTC_VOICE_ENGINE_CHANNEL_H
#include "voe_network.h"
#include "audio_coding_module.h" #include "audio_coding_module.h"
#include "audio_conference_mixer_defines.h"
#include "common_types.h" #include "common_types.h"
#include "shared_data.h" #include "dtmf_inband.h"
#include "dtmf_inband_queue.h"
#include "file_player.h"
#include "file_recorder.h"
#include "level_indicator.h"
#include "resampler.h"
#include "rtp_rtcp.h" #include "rtp_rtcp.h"
#include "scoped_ptr.h"
#include "shared_data.h"
#include "voe_audio_processing.h" #include "voe_audio_processing.h"
#include "voe_network.h"
#include "voice_engine_defines.h" #include "voice_engine_defines.h"
#ifndef WEBRTC_EXTERNAL_TRANSPORT #ifndef WEBRTC_EXTERNAL_TRANSPORT
#include "udp_transport.h" #include "udp_transport.h"
#endif #endif
#include "audio_conference_mixer_defines.h"
#include "file_player.h"
#include "file_recorder.h"
#ifdef WEBRTC_SRTP #ifdef WEBRTC_SRTP
#include "SrtpModule.h" #include "SrtpModule.h"
#endif #endif
#include "dtmf_inband.h"
#include "dtmf_inband_queue.h"
#include "level_indicator.h"
#include "resampler.h"
#ifdef WEBRTC_DTMF_DETECTION #ifdef WEBRTC_DTMF_DETECTION
#include "voe_dtmf.h" // TelephoneEventDetectionMethods, TelephoneEventObserver #include "voe_dtmf.h" // TelephoneEventDetectionMethods, TelephoneEventObserver
#endif #endif
@ -513,8 +513,7 @@ public:
return _socketTransportModule.ReceiveSocketsInitialized(); return _socketTransportModule.ReceiveSocketsInitialized();
}; };
#endif #endif
WebRtc_UWord32 Demultiplex(const AudioFrame& audioFrame, WebRtc_UWord32 Demultiplex(const AudioFrame& audioFrame);
const WebRtc_UWord8 audioLevel_dBov);
WebRtc_UWord32 PrepareEncodeAndSend(int mixingFrequency); WebRtc_UWord32 PrepareEncodeAndSend(int mixingFrequency);
WebRtc_UWord32 EncodeAndSend(); WebRtc_UWord32 EncodeAndSend();
@ -590,6 +589,7 @@ private:
CriticalSectionWrapper* _callbackCritSectPtr; // owned by base CriticalSectionWrapper* _callbackCritSectPtr; // owned by base
Transport* _transportPtr; // WebRtc socket or external transport Transport* _transportPtr; // WebRtc socket or external transport
Encryption* _encryptionPtr; // WebRtc SRTP or external encryption Encryption* _encryptionPtr; // WebRtc SRTP or external encryption
scoped_ptr<AudioProcessing> _rtpAudioProc;
AudioProcessing* _rxAudioProcessingModulePtr; // far end AudioProcessing AudioProcessing* _rxAudioProcessingModulePtr; // far end AudioProcessing
#ifdef WEBRTC_DTMF_DETECTION #ifdef WEBRTC_DTMF_DETECTION
VoETelephoneEventObserver* _telephoneEventDetectionPtr; VoETelephoneEventObserver* _telephoneEventDetectionPtr;

View File

@ -195,9 +195,7 @@ TransmitMixer::TransmitMixer(const WebRtc_UWord32 instanceId) :
_externalMediaCallbackPtr(NULL), _externalMediaCallbackPtr(NULL),
_mute(false), _mute(false),
_remainingMuteMicTimeMs(0), _remainingMuteMicTimeMs(0),
_mixingFrequency(0), _mixingFrequency(0)
_includeAudioLevelIndication(false),
_audioLevel_dBov(100)
{ {
WEBRTC_TRACE(kTraceMemory, kTraceVoice, VoEId(_instanceId, -1), WEBRTC_TRACE(kTraceMemory, kTraceVoice, VoEId(_instanceId, -1),
"TransmitMixer::TransmitMixer() - ctor"); "TransmitMixer::TransmitMixer() - ctor");
@ -371,7 +369,6 @@ TransmitMixer::PrepareDemux(const WebRtc_Word8* audioSamples,
if (_mute) if (_mute)
{ {
AudioFrameOperations::Mute(_audioFrame); AudioFrameOperations::Mute(_audioFrame);
_audioLevel_dBov = 100;
} }
// --- Measure audio level of speech after APM processing // --- Measure audio level of speech after APM processing
@ -442,7 +439,7 @@ TransmitMixer::DemuxAndMix()
// load temporary audioframe with current (mixed) microphone signal // load temporary audioframe with current (mixed) microphone signal
AudioFrame tmpAudioFrame = _audioFrame; AudioFrame tmpAudioFrame = _audioFrame;
channelPtr->Demultiplex(tmpAudioFrame, _audioLevel_dBov); channelPtr->Demultiplex(tmpAudioFrame);
channelPtr->PrepareEncodeAndSend(_mixingFrequency); channelPtr->PrepareEncodeAndSend(_mixingFrequency);
} }
channelPtr = sc.GetNextChannel(iterator); channelPtr = sc.GetNextChannel(iterator);
@ -1323,30 +1320,6 @@ WebRtc_Word32 TransmitMixer::APMProcessStream(
// Store new capture level (only updated when analog AGC is enabled) // Store new capture level (only updated when analog AGC is enabled)
_captureLevel = captureLevel; _captureLevel = captureLevel;
// Store current audio level (in dBov) if audio-level-indication
// functionality has been enabled. This value will be include in an
// extended RTP header by the RTP module.
if (_includeAudioLevelIndication)
{
if (_audioProcessingModulePtr->level_estimator()->is_enabled())
{
LevelEstimator::Metrics metrics;
LevelEstimator::Metrics reverseMetrics;
_audioProcessingModulePtr->level_estimator()->GetMetrics(
&metrics,
&reverseMetrics);
const WebRtc_Word16 absAudioLevel_dBov =
WEBRTC_ABS(metrics.speech.instant);
_audioLevel_dBov = static_cast<WebRtc_UWord8> (absAudioLevel_dBov);
} else
{
WEBRTC_TRACE(kTraceWarning, kTraceVoice, VoEId(_instanceId, -1),
"TransmitMixer::APMProcessStream() failed to "
"retrieve level metrics");
_audioLevel_dBov = 100;
}
}
// Log notifications // Log notifications
if (_audioProcessingModulePtr->gain_control()->stream_is_saturated()) if (_audioProcessingModulePtr->gain_control()->stream_is_saturated())
{ {

View File

@ -69,10 +69,6 @@ public:
WebRtc_Word32 StopSend(); WebRtc_Word32 StopSend();
void SetRTPAudioLevelIndicationStatus(bool enable)
{ _includeAudioLevelIndication = enable; }
// VoEDtmf // VoEDtmf
void UpdateMuteMicrophoneTime(const WebRtc_UWord32 lengthMs); void UpdateMuteMicrophoneTime(const WebRtc_UWord32 lengthMs);
@ -217,7 +213,6 @@ private:
WebRtc_Word32 _remainingMuteMicTimeMs; WebRtc_Word32 _remainingMuteMicTimeMs;
int _mixingFrequency; int _mixingFrequency;
bool _includeAudioLevelIndication; bool _includeAudioLevelIndication;
WebRtc_UWord8 _audioLevel_dBov;
}; };
#endif // WEBRTC_VOICE_ENGINE_TRANSMIT_MIXER_H #endif // WEBRTC_VOICE_ENGINE_TRANSMIT_MIXER_H

View File

@ -262,22 +262,6 @@ int VoERTP_RTCPImpl::SetRTPAudioLevelIndicationStatus(int channel,
return -1; return -1;
} }
// Set AudioProcessingModule level-metric mode based on user input.
// Note that the Level Estimator component is currently not supported
if (_audioProcessingModulePtr->level_estimator()->Enable(enable) != 0)
{
_engineStatistics.SetLastError(
VE_APM_ERROR, kTraceError,
"SetRTPAudioLevelIndicationStatus() failed to set level-metric"
"mode");
return -1;
}
// Ensure that the transmit mixer reads the audio-level metric for each
// 10ms packet and copies the same value to all active channels.
// The metric is derived within the AudioProcessingModule.
_transmitMixerPtr->SetRTPAudioLevelIndicationStatus(enable);
// Set state and ID for the specified channel. // Set state and ID for the specified channel.
voe::ScopedChannel sc(_channelManager, channel); voe::ScopedChannel sc(_channelManager, channel);
voe::Channel* channelPtr = sc.ChannelPtr(); voe::Channel* channelPtr = sc.ChannelPtr();

View File

@ -6905,6 +6905,67 @@ int VoEExtendedTest::TestNetwork()
// VoEExtendedTest::TestRTP_RTCP // VoEExtendedTest::TestRTP_RTCP
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// Used to validate packets during the RTP audio level indication test.
class RTPAudioTransport : public Transport {
public:
RTPAudioTransport()
: mute_(false) {}
virtual ~RTPAudioTransport() {}
void set_mute(bool mute) { mute_ = mute; }
bool mute() const { return mute_; }
// TODO(andrew): use proper error checks here rather than asserts.
virtual int SendPacket(int channel, const void* data, int length) {
const uint8_t* packet = static_cast<const uint8_t*>(data);
// Extension bit.
assert(packet[0] & 0x10);
int index = 12; // Assume standard RTP header.
// Header extension ID
assert(packet[index++] == 0xBE);
assert(packet[index++] == 0xDE);
// Header extension length
assert(packet[index++] == 0x00);
assert(packet[index++] == 0x01);
// User-defined ID.
assert(((packet[index] & 0xf0) >> 4) == 1);
// Length
assert((packet[index++] & 0x0f) == 0);
int vad = packet[index] >> 7;
int level = packet[index] & 0x7f;
if (channel == 0) {
printf("%d -%d\n", vad, level);
} else if (channel == 1) {
printf(" %d -%d\n", vad, level);
} else {
assert(false);
}
if (mute_) {
assert(vad == 0);
assert(level == 127);
} else {
assert(vad == 0 || vad == 1);
assert(level >= 0 && level <= 127);
}
return 0;
}
virtual int SendRTCPPacket(int /*channel*/, const void* /*data*/,
int /*length*/) {
return 0;
}
private:
bool mute_;
};
int VoEExtendedTest::TestRTP_RTCP() int VoEExtendedTest::TestRTP_RTCP()
{ {
PrepareTest("RTP_RTCP"); PrepareTest("RTP_RTCP");
@ -6912,6 +6973,9 @@ int VoEExtendedTest::TestRTP_RTCP()
VoEBase* base = _mgr.BasePtr(); VoEBase* base = _mgr.BasePtr();
VoEFile* file = _mgr.FilePtr(); VoEFile* file = _mgr.FilePtr();
VoERTP_RTCP* rtp_rtcp = _mgr.RTP_RTCPPtr(); VoERTP_RTCP* rtp_rtcp = _mgr.RTP_RTCPPtr();
VoENetwork* network = _mgr.NetworkPtr();
VoEVolumeControl* volume = _mgr.VolumeControlPtr();
VoECodec* codec = _mgr.CodecPtr();
XRTPObserver rtpObserver; XRTPObserver rtpObserver;
@ -6961,8 +7025,6 @@ int VoEExtendedTest::TestRTP_RTCP()
TEST_ERROR(VE_INVALID_ARGUMENT); TEST_ERROR(VE_INVALID_ARGUMENT);
TEST_MUSTPASS(-1 != rtp_rtcp->SetRTPAudioLevelIndicationStatus(0, false, 15)); TEST_MUSTPASS(-1 != rtp_rtcp->SetRTPAudioLevelIndicationStatus(0, false, 15));
MARK(); MARK();
// TODO(bjornv): Activate tests below when APM supports level estimation.
/*
TEST_MUSTPASS(-1 != rtp_rtcp->SetRTPAudioLevelIndicationStatus(1, true, 5)); TEST_MUSTPASS(-1 != rtp_rtcp->SetRTPAudioLevelIndicationStatus(1, true, 5));
MARK(); MARK();
TEST_ERROR(VE_CHANNEL_NOT_VALID); TEST_ERROR(VE_CHANNEL_NOT_VALID);
@ -6986,10 +7048,70 @@ int VoEExtendedTest::TestRTP_RTCP()
TEST_MUSTPASS(audioLevelEnabled != false); TEST_MUSTPASS(audioLevelEnabled != false);
TEST_MUSTPASS(ID != id); TEST_MUSTPASS(ID != id);
} }
TEST_MUSTPASS(base->StopPlayout(0));
TEST_MUSTPASS(base->StopSend(0));
TEST_MUSTPASS(base->StopPlayout(0));
TEST_MUSTPASS(base->DeleteChannel(0));
RTPAudioTransport rtpAudioTransport;
TEST_MUSTPASS(base->CreateChannel());
TEST_MUSTPASS(network->RegisterExternalTransport(0, rtpAudioTransport));
TEST_MUSTPASS(rtp_rtcp->SetRTPAudioLevelIndicationStatus(0, true));
TEST_MUSTPASS(codec->SetVADStatus(0, true));
printf("\n\nReceving muted packets (expect VAD = 0, Level = -127)...\n");
printf("VAD Level [dbFS]\n");
SLEEP(2000);
rtpAudioTransport.set_mute(true);
TEST_MUSTPASS(volume->SetInputMute(0, true));
TEST_MUSTPASS(base->StartSend(0));
SLEEP(5000);
TEST_MUSTPASS(base->StopSend(0));
rtpAudioTransport.set_mute(false);
TEST_MUSTPASS(volume->SetInputMute(0, false));
printf("\nReceiving packets from mic (should respond to mic level)...\n");
printf("VAD Level [dbFS]\n");
SLEEP(2000);
TEST_MUSTPASS(base->StartSend(0));
SLEEP(5000);
TEST_MUSTPASS(base->StopSend(0));
printf("\nReceiving packets from file (expect mostly VAD = 1)...\n");
printf("VAD Level [dbFS]\n");
SLEEP(2000);
TEST_MUSTPASS(file->StartPlayingFileAsMicrophone(0, _mgr.AudioFilename(),
true, true));
TEST_MUSTPASS(base->StartSend(0));
SLEEP(5000);
TEST_MUSTPASS(base->StopSend(0));
printf("\nMuted and mic on independent channels...\n");
printf("Muted Mic\n");
SLEEP(2000);
ASSERT_TRUE(1 == base->CreateChannel());
TEST_MUSTPASS(network->RegisterExternalTransport(1, rtpAudioTransport));
TEST_MUSTPASS(rtp_rtcp->SetRTPAudioLevelIndicationStatus(1, true));
TEST_MUSTPASS(codec->SetVADStatus(1, true));
TEST_MUSTPASS(volume->SetInputMute(0, true));
TEST_MUSTPASS(base->StartSend(0));
TEST_MUSTPASS(base->StartSend(1));
SLEEP(5000);
TEST_MUSTPASS(base->StopSend(0));
TEST_MUSTPASS(base->StopSend(1));
TEST_MUSTPASS(network->DeRegisterExternalTransport(0));
TEST_MUSTPASS(network->DeRegisterExternalTransport(1));
TEST_MUSTPASS(base->DeleteChannel(0));
TEST_MUSTPASS(base->DeleteChannel(1));
TEST_MUSTPASS(base->CreateChannel());
TEST_MUSTPASS(base->SetLocalReceiver(0, 12345));
TEST_MUSTPASS(base->SetSendDestination(0, 12345, "127.0.0.1"));
TEST_MUSTPASS(base->StartReceive(0));
TEST_MUSTPASS(base->StartSend(0));
TEST_MUSTPASS(base->StartPlayout(0));
// disable audio-level-rtp-header-extension
TEST_MUSTPASS(rtp_rtcp->SetRTPAudioLevelIndicationStatus(0, false));
*/
MARK(); MARK();
ANL(); ANL();
@ -7306,8 +7428,6 @@ int VoEExtendedTest::TestRTP_RTCP()
//The following test is related to defect 4985 and 4986 //The following test is related to defect 4985 and 4986
TEST_LOG("Turn FEC and VAD on and wait for 4 seconds and ensure that " TEST_LOG("Turn FEC and VAD on and wait for 4 seconds and ensure that "
"the jitter is still small..."); "the jitter is still small...");
VoECodec* codec = _mgr.CodecPtr();
TEST_MUSTPASS(NULL == codec);
CodecInst cinst; CodecInst cinst;
#if (!defined(MAC_IPHONE) && !defined(WEBRTC_ANDROID)) #if (!defined(MAC_IPHONE) && !defined(WEBRTC_ANDROID))
cinst.pltype = 104; cinst.pltype = 104;