Merge audio_processing changes.

R=aluebs@webrtc.org, bjornv@webrtc.org
BUG=

Review URL: https://webrtc-codereview.appspot.com/32769004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@7893 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
pbos@webrtc.org 2014-12-15 09:41:24 +00:00
parent fb108b5a28
commit 788acd17ad
108 changed files with 7822 additions and 51 deletions

3
.gitignore vendored
View File

@ -48,8 +48,7 @@
/links.db
/net
/out
/resources/*.*
/resources/*/*.*
/resources
/talk/examples/android/bin
/talk/examples/android/gen
/talk/examples/android/libs

View File

@ -0,0 +1 @@
10a52dc6d6f15242a1aa549205657f2834353673

View File

@ -0,0 +1 @@
61219028e15606a3adbbc61d393575ab36b4078b

View File

@ -0,0 +1 @@
ba0c6e93a5e6d351d95385699fb9a719b6a6d0cc

View File

@ -0,0 +1 @@
590c6fe033665d11fa70dbbbd3e7d8f0b8a616ce

View File

@ -0,0 +1 @@
3a5a28763e3ad5cd0f2833a90b685f4da97c2002

View File

@ -0,0 +1 @@
7cae05c6902812609fa23ac04037485503b0924d

View File

@ -0,0 +1 @@
b1ea860f0bfad3e86fedc43cd8752821e0d75a46

View File

@ -0,0 +1 @@
49402cfaa36be32320167a65c8e96f70548f5257

View File

@ -0,0 +1 @@
7c80af623675b2284f4081cfd2df9a0227bbc2a0

View File

@ -0,0 +1 @@
04155a7e186deb7524e3013476de3eaabd59a1f8

View File

@ -0,0 +1 @@
6c33b25be2eb9b441429aabf203d5b4a9e734c63

View File

@ -0,0 +1 @@
81cb7e547fad2894b5702fa571f9eb55ed6e1096

View File

@ -0,0 +1 @@
81cfcff6b0d70938fe74060ba0303504c31c6d7e

View File

@ -0,0 +1 @@
01278951e13675a3467782e1d2f18273c05eef50

View File

@ -0,0 +1 @@
5fcb4621ea0f50c3fc9a63e4720ff52631258437

View File

@ -0,0 +1 @@
35639dd1b73b678360897975a91a7c8af0be3644

View File

@ -0,0 +1 @@
c9d3d0b81262ffaba7d358ad534e6fcb27c00076

View File

@ -0,0 +1 @@
f46a3380c9285324e583965ef547fcaa1650f8b8

View File

@ -0,0 +1 @@
f625c14d134d69ad38b67295459406fc9947a705

View File

@ -0,0 +1 @@
c26083880cd227178917b4df230520dbfb9b9bb1

View File

@ -0,0 +1 @@
0eaaf21344b4b030d6c0fb6dcc419e7d3959a148

View File

@ -0,0 +1 @@
9781792dc39d7aada6418370246eef9f544ca47b

View File

@ -0,0 +1 @@
8b2bd11b591521178232aae598e6df0d001051c4

View File

@ -0,0 +1 @@
8a6c7ed696f9791f8cb5c5b061f07eb019affd49

View File

@ -0,0 +1 @@
7c01839f888fe6e10276e1819bd5207668345dcf

View File

@ -0,0 +1 @@
f7553df9abca91401715185d97d1d9c20a2ecb9b

View File

@ -0,0 +1 @@
0455d7042c64075e793285753a98f02268e6238b

View File

@ -0,0 +1 @@
941cc5d0bfccfd1d6bd68a1d882975202f22b6de

View File

@ -0,0 +1 @@
a16139b3750a13b62327e2a78ea008493a2b508b

View File

@ -0,0 +1 @@
6bf9272123656bc0561550a40734245709bbac10

View File

@ -0,0 +1 @@
6a2667c6c4b3794776af1dabacc3575791023168

View File

@ -0,0 +1 @@
620cf1f732c99003ff0e5d6ae3350c0a2ea2a9d7

View File

@ -22,25 +22,52 @@ declare_args() {
source_set("audio_processing") {
sources = [
"aec/include/echo_cancellation.h",
"aec/aec_core.c",
"aec/aec_core.h",
"aec/aec_core_internal.h",
"aec/aec_rdft.c",
"aec/aec_rdft.h",
"aec/aec_resampler.c",
"aec/aec_resampler.h",
"aec/echo_cancellation.c",
"aec/echo_cancellation_internal.h",
"aec/aec_core.h",
"aec/aec_core.c",
"aec/aec_core_internal.h",
"aec/aec_rdft.h",
"aec/aec_rdft.c",
"aec/aec_resampler.h",
"aec/aec_resampler.c",
"aecm/include/echo_control_mobile.h",
"aecm/echo_control_mobile.c",
"aec/include/echo_cancellation.h",
"aecm/aecm_core.c",
"aecm/aecm_core.h",
"agc/include/gain_control.h",
"aecm/echo_control_mobile.c",
"aecm/include/echo_control_mobile.h",
"agc/agc.cc",
"agc/agc.h",
"agc/agc_audio_proc.cc",
"agc/agc_audio_proc.h",
"agc/agc_audio_proc_internal.h",
"agc/agc_manager_direct.cc",
"agc/agc_manager_direct.h",
"agc/analog_agc.c",
"agc/analog_agc.h",
"agc/circular_buffer.cc",
"agc/circular_buffer.h",
"agc/common.h",
"agc/digital_agc.c",
"agc/digital_agc.h",
"agc/gain_map_internal.h",
"agc/gmm.cc",
"agc/gmm.h",
"agc/histogram.cc",
"agc/histogram.h",
"agc/include/gain_control.h",
"agc/noise_gmm_tables.h",
"agc/pitch_based_vad.cc",
"agc/pitch_based_vad.h",
"agc/pitch_internal.cc",
"agc/pitch_internal.h",
"agc/pole_zero_filter.cc",
"agc/pole_zero_filter.h",
"agc/standalone_vad.cc",
"agc/standalone_vad.h",
"agc/utility.cc",
"agc/utility.h",
"agc/voice_gmm_tables.h",
"audio_buffer.cc",
"audio_buffer.h",
"audio_processing_impl.cc",
@ -67,6 +94,19 @@ source_set("audio_processing") {
"rms_level.h",
"splitting_filter.cc",
"splitting_filter.h",
"transient/common.h",
"transient/daubechies_8_wavelet_coeffs.h",
"transient/dyadic_decimator.h",
"transient/moving_moments.cc",
"transient/moving_moments.h",
"transient/transient_detector.cc",
"transient/transient_detector.h",
"transient/transient_suppressor.cc",
"transient/transient_suppressor.h",
"transient/wpd_node.cc",
"transient/wpd_node.h",
"transient/wpd_tree.cc",
"transient/wpd_tree.h",
"typing_detection.cc",
"typing_detection.h",
"utility/delay_estimator.c",

View File

@ -0,0 +1,161 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/agc.h"
#include <cmath>
#include <cstdlib>
#include <algorithm>
#include "webrtc/common_audio/resampler/include/resampler.h"
#include "webrtc/modules/audio_processing/agc/agc_audio_proc.h"
#include "webrtc/modules/audio_processing/agc/common.h"
#include "webrtc/modules/audio_processing/agc/histogram.h"
#include "webrtc/modules/audio_processing/agc/pitch_based_vad.h"
#include "webrtc/modules/audio_processing/agc/standalone_vad.h"
#include "webrtc/modules/audio_processing/agc/utility.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/system_wrappers/interface/compile_assert.h"
namespace webrtc {
namespace {
const int kDefaultLevelDbfs = -18;
const double kDefaultVoiceValue = 1.0;
const int kNumAnalysisFrames = 100;
const double kActivityThreshold = 0.3;
} // namespace
Agc::Agc()
: target_level_loudness_(Dbfs2Loudness(kDefaultLevelDbfs)),
last_voice_probability_(kDefaultVoiceValue),
target_level_dbfs_(kDefaultLevelDbfs),
standalone_vad_enabled_(true),
histogram_(Histogram::Create(kNumAnalysisFrames)),
inactive_histogram_(Histogram::Create()),
audio_processing_(new AgcAudioProc()),
pitch_based_vad_(new PitchBasedVad()),
standalone_vad_(StandaloneVad::Create()),
// Initialize to the most common resampling situation.
resampler_(new Resampler(32000, kSampleRateHz, kResamplerSynchronous)) {
}
Agc::~Agc() {}
float Agc::AnalyzePreproc(const int16_t* audio, int length) {
assert(length > 0);
int num_clipped = 0;
for (int i = 0; i < length; ++i) {
if (audio[i] == 32767 || audio[i] == -32768)
++num_clipped;
}
return 1.0f * num_clipped / length;
}
int Agc::Process(const int16_t* audio, int length, int sample_rate_hz) {
assert(length == sample_rate_hz / 100);
if (sample_rate_hz > 32000) {
return -1;
}
// Resample to the required rate.
int16_t resampled[kLength10Ms];
const int16_t* resampled_ptr = audio;
if (sample_rate_hz != kSampleRateHz) {
if (resampler_->ResetIfNeeded(sample_rate_hz,
kSampleRateHz,
kResamplerSynchronous) != 0) {
return -1;
}
resampler_->Push(audio, length, resampled, kLength10Ms, length);
resampled_ptr = resampled;
}
assert(length == kLength10Ms);
if (standalone_vad_enabled_) {
if (standalone_vad_->AddAudio(resampled_ptr, length) != 0)
return -1;
}
AudioFeatures features;
audio_processing_->ExtractFeatures(resampled_ptr, length, &features);
if (features.num_frames > 0) {
if (features.silence) {
// The other features are invalid, so update the histogram with an
// arbitrary low value.
for (int n = 0; n < features.num_frames; ++n)
histogram_->Update(features.rms[n], 0.01);
return 0;
}
// Initialize to 0.5 which is a neutral value for combining probabilities,
// in case the standalone-VAD is not enabled.
double p_combined[] = {0.5, 0.5, 0.5, 0.5};
COMPILE_ASSERT(sizeof(p_combined) / sizeof(p_combined[0]) == kMaxNumFrames,
combined_probability_incorrect_size);
if (standalone_vad_enabled_) {
if (standalone_vad_->GetActivity(p_combined, kMaxNumFrames) < 0)
return -1;
}
// If any other VAD is enabled it must be combined before calling the
// pitch-based VAD.
if (pitch_based_vad_->VoicingProbability(features, p_combined) < 0)
return -1;
for (int n = 0; n < features.num_frames; n++) {
histogram_->Update(features.rms[n], p_combined[n]);
last_voice_probability_ = p_combined[n];
}
}
return 0;
}
bool Agc::GetRmsErrorDb(int* error) {
if (!error) {
assert(false);
return false;
}
if (histogram_->num_updates() < kNumAnalysisFrames) {
// We haven't yet received enough frames.
return false;
}
if (histogram_->AudioContent() < kNumAnalysisFrames * kActivityThreshold) {
// We are likely in an inactive segment.
return false;
}
double loudness = Linear2Loudness(histogram_->CurrentRms());
*error = std::floor(Loudness2Db(target_level_loudness_ - loudness) + 0.5);
histogram_->Reset();
return true;
}
void Agc::Reset() {
histogram_->Reset();
}
int Agc::set_target_level_dbfs(int level) {
// TODO(turajs): just some arbitrary sanity check. We can come up with better
// limits. The upper limit should be chosen such that the risk of clipping is
// low. The lower limit should not result in a too quiet signal.
if (level >= 0 || level <= -100)
return -1;
target_level_dbfs_ = level;
target_level_loudness_ = Dbfs2Loudness(level);
return 0;
}
void Agc::EnableStandaloneVad(bool enable) {
standalone_vad_enabled_ = enable;
}
} // namespace webrtc

View File

@ -0,0 +1,69 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class AudioFrame;
class AgcAudioProc;
class Histogram;
class PitchBasedVad;
class Resampler;
class StandaloneVad;
class Agc {
public:
Agc();
virtual ~Agc();
// Returns the proportion of samples in the buffer which are at full-scale
// (and presumably clipped).
virtual float AnalyzePreproc(const int16_t* audio, int length);
// |audio| must be mono; in a multi-channel stream, provide the first (usually
// left) channel.
virtual int Process(const int16_t* audio, int length, int sample_rate_hz);
// Retrieves the difference between the target RMS level and the current
// signal RMS level in dB. Returns true if an update is available and false
// otherwise, in which case |error| should be ignored and no action taken.
virtual bool GetRmsErrorDb(int* error);
virtual void Reset();
virtual int set_target_level_dbfs(int level);
virtual int target_level_dbfs() const { return target_level_dbfs_; }
virtual void EnableStandaloneVad(bool enable);
virtual bool standalone_vad_enabled() const {
return standalone_vad_enabled_;
}
virtual double voice_probability() const { return last_voice_probability_; }
private:
double target_level_loudness_;
double last_voice_probability_;
int target_level_dbfs_;
bool standalone_vad_enabled_;
scoped_ptr<Histogram> histogram_;
scoped_ptr<Histogram> inactive_histogram_;
scoped_ptr<AgcAudioProc> audio_processing_;
scoped_ptr<PitchBasedVad> pitch_based_vad_;
scoped_ptr<StandaloneVad> standalone_vad_;
scoped_ptr<Resampler> resampler_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_

View File

@ -0,0 +1,270 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/agc_audio_proc.h"
#include <math.h>
#include <stdio.h>
#include "webrtc/modules/audio_processing/agc/agc_audio_proc_internal.h"
#include "webrtc/modules/audio_processing/agc/pitch_internal.h"
#include "webrtc/modules/audio_processing/agc/pole_zero_filter.h"
extern "C" {
#include "webrtc/modules/audio_coding/codecs/isac/main/source/codec.h"
#include "webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h"
#include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
#include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h"
#include "webrtc/modules/audio_processing/utility/fft4g.h"
}
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/system_wrappers/interface/compile_assert.h"
namespace webrtc {
// The following structures are declared anonymous in iSAC's structs.h. To
// forward declare them, we use this derived class trick.
struct AgcAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {};
struct AgcAudioProc::PreFiltBankstr : public ::PreFiltBankstr {};
static const float kFrequencyResolution = kSampleRateHz /
static_cast<float>(AgcAudioProc::kDftSize);
static const int kSilenceRms = 5;
// TODO(turajs): Make a Create or Init for AgcAudioProc.
AgcAudioProc::AgcAudioProc()
: audio_buffer_(),
num_buffer_samples_(kNumPastSignalSamples),
log_old_gain_(-2),
old_lag_(50), // Arbitrary but valid as pitch-lag (in samples).
pitch_analysis_handle_(new PitchAnalysisStruct),
pre_filter_handle_(new PreFiltBankstr),
high_pass_filter_(PoleZeroFilter::Create(
kCoeffNumerator, kFilterOrder, kCoeffDenominator, kFilterOrder)) {
COMPILE_ASSERT(kNumPastSignalSamples + kNumSubframeSamples ==
sizeof(kLpcAnalWin) / sizeof(kLpcAnalWin[0]),
lpc_analysis_window_incorrect_size);
COMPILE_ASSERT(kLpcOrder + 1 == sizeof(kCorrWeight) / sizeof(kCorrWeight[0]),
correlation_weight_incorrect_size);
// TODO(turajs): Are we doing too much in the constructor?
float data[kDftSize];
// Make FFT to initialize.
ip_[0] = 0;
WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_);
// TODO(turajs): Need to initialize high-pass filter.
// Initialize iSAC components.
WebRtcIsac_InitPreFilterbank(pre_filter_handle_.get());
WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get());
}
AgcAudioProc::~AgcAudioProc() {}
void AgcAudioProc::ResetBuffer() {
memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess],
sizeof(audio_buffer_[0]) * kNumPastSignalSamples);
num_buffer_samples_ = kNumPastSignalSamples;
}
int AgcAudioProc::ExtractFeatures(const int16_t* frame,
int length,
AudioFeatures* features) {
features->num_frames = 0;
if (length != kNumSubframeSamples) {
return -1;
}
// High-pass filter to remove the DC component and very low frequency content.
// We have experienced that this high-pass filtering improves voice/non-voiced
// classification.
if (high_pass_filter_->Filter(frame, kNumSubframeSamples,
&audio_buffer_[num_buffer_samples_]) != 0) {
return -1;
}
num_buffer_samples_ += kNumSubframeSamples;
if (num_buffer_samples_ < kBufferLength) {
return 0;
}
assert(num_buffer_samples_ == kBufferLength);
features->num_frames = kNum10msSubframes;
features->silence = false;
Rms(features->rms, kMaxNumFrames);
for (int i = 0; i < kNum10msSubframes; ++i) {
if (features->rms[i] < kSilenceRms) {
// PitchAnalysis can cause NaNs in the pitch gain if it's fed silence.
// Bail out here instead.
features->silence = true;
ResetBuffer();
return 0;
}
}
PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz,
kMaxNumFrames);
FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames);
ResetBuffer();
return 0;
}
// Computes |kLpcOrder + 1| correlation coefficients.
void AgcAudioProc::SubframeCorrelation(double* corr, int length_corr,
int subframe_index) {
assert(length_corr >= kLpcOrder + 1);
double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples];
int buffer_index = subframe_index * kNumSubframeSamples;
for (int n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++)
windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n];
WebRtcIsac_AutoCorr(corr, windowed_audio, kNumSubframeSamples +
kNumPastSignalSamples, kLpcOrder);
}
// Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input.
// The analysis window is 15 ms long and it is centered on the first half of
// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the
// first half of each 10 ms subframe.
void AgcAudioProc::GetLpcPolynomials(double* lpc, int length_lpc) {
assert(length_lpc >= kNum10msSubframes * (kLpcOrder + 1));
double corr[kLpcOrder + 1];
double reflec_coeff[kLpcOrder];
for (int i = 0, offset_lpc = 0; i < kNum10msSubframes;
i++, offset_lpc += kLpcOrder + 1) {
SubframeCorrelation(corr, kLpcOrder + 1, i);
corr[0] *= 1.0001;
// This makes Lev-Durb a bit more stable.
for (int k = 0; k < kLpcOrder + 1; k++) {
corr[k] *= kCorrWeight[k];
}
WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder);
}
}
// Fit a second order curve to these 3 points and find the location of the
// extremum. The points are inverted before curve fitting.
static float QuadraticInterpolation(float prev_val, float curr_val,
float next_val) {
// Doing the interpolation in |1 / A(z)|^2.
float fractional_index = 0;
next_val = 1.0f / next_val;
prev_val = 1.0f / prev_val;
curr_val = 1.0f / curr_val;
fractional_index = -(next_val - prev_val) * 0.5f / (next_val + prev_val -
2.f * curr_val);
assert(fabs(fractional_index) < 1);
return fractional_index;
}
// 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope
// of the input signal. The local maximum of the spectral envelope corresponds
// with the local minimum of A(z). It saves complexity, as we save one
// inversion. Furthermore, we find the first local maximum of magnitude squared,
// to save on one square root.
void AgcAudioProc::FindFirstSpectralPeaks(double* f_peak, int length_f_peak) {
assert(length_f_peak >= kNum10msSubframes);
double lpc[kNum10msSubframes * (kLpcOrder + 1)];
// For all sub-frames.
GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1));
const int kNumDftCoefficients = kDftSize / 2 + 1;
float data[kDftSize];
for (int i = 0; i < kNum10msSubframes; i++) {
// Convert to float with zero pad.
memset(data, 0, sizeof(data));
for (int n = 0; n < kLpcOrder + 1; n++) {
data[n] = static_cast<float>(lpc[i * (kLpcOrder + 1) + n]);
}
// Transform to frequency domain.
WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_);
int index_peak = 0;
float prev_magn_sqr = data[0] * data[0];
float curr_magn_sqr = data[2] * data[2] + data[3] * data[3];
float next_magn_sqr;
bool found_peak = false;
for (int n = 2; n < kNumDftCoefficients - 1; n++) {
next_magn_sqr = data[2 * n] * data[2 * n] +
data[2 * n + 1] * data[2 * n + 1];
if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) {
found_peak = true;
index_peak = n - 1;
break;
}
prev_magn_sqr = curr_magn_sqr;
curr_magn_sqr = next_magn_sqr;
}
float fractional_index = 0;
if (!found_peak) {
// Checking if |kNumDftCoefficients - 1| is the local minimum.
next_magn_sqr = data[1] * data[1];
if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) {
index_peak = kNumDftCoefficients - 1;
}
} else {
// A peak is found, do a simple quadratic interpolation to get a more
// accurate estimate of the peak location.
fractional_index = QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr,
next_magn_sqr);
}
f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution;
}
}
// Using iSAC functions to estimate pitch gains & lags.
void AgcAudioProc::PitchAnalysis(double* log_pitch_gains, double* pitch_lags_hz,
int length) {
// TODO(turajs): This can be "imported" from iSAC & and the next two
// constants.
assert(length >= kNum10msSubframes);
const int kNumPitchSubframes = 4;
double gains[kNumPitchSubframes];
double lags[kNumPitchSubframes];
const int kNumSubbandFrameSamples = 240;
const int kNumLookaheadSamples = 24;
float lower[kNumSubbandFrameSamples];
float upper[kNumSubbandFrameSamples];
double lower_lookahead[kNumSubbandFrameSamples];
double upper_lookahead[kNumSubbandFrameSamples];
double lower_lookahead_pre_filter[kNumSubbandFrameSamples +
kNumLookaheadSamples];
// Split signal to lower and upper bands
WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples],
lower, upper, lower_lookahead, upper_lookahead,
pre_filter_handle_.get());
WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter,
pitch_analysis_handle_.get(), lags, gains);
// Lags are computed on lower-band signal with sampling rate half of the
// input signal.
GetSubframesPitchParameters(kSampleRateHz / 2, gains, lags,
kNumPitchSubframes, kNum10msSubframes,
&log_old_gain_, &old_lag_,
log_pitch_gains, pitch_lags_hz);
}
void AgcAudioProc::Rms(double* rms, int length_rms) {
assert(length_rms >= kNum10msSubframes);
int offset = kNumPastSignalSamples;
for (int i = 0; i < kNum10msSubframes; i++) {
rms[i] = 0;
for (int n = 0; n < kNumSubframeSamples; n++, offset++)
rms[i] += audio_buffer_[offset] * audio_buffer_[offset];
rms[i] = sqrt(rms[i] / kNumSubframeSamples);
}
}
} // namespace webrtc

View File

@ -0,0 +1,83 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_H_
#include "webrtc/modules/audio_processing/agc/common.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class AudioFrame;
class PoleZeroFilter;
class AgcAudioProc {
public:
// Forward declare iSAC structs.
struct PitchAnalysisStruct;
struct PreFiltBankstr;
AgcAudioProc();
~AgcAudioProc();
int ExtractFeatures(const int16_t* audio_frame,
int length,
AudioFeatures* audio_features);
static const int kDftSize = 512;
private:
void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, int length);
void SubframeCorrelation(double* corr, int lenght_corr, int subframe_index);
void GetLpcPolynomials(double* lpc, int length_lpc);
void FindFirstSpectralPeaks(double* f_peak, int length_f_peak);
void Rms(double* rms, int length_rms);
void ResetBuffer();
// To compute spectral peak we perform LPC analysis to get spectral envelope.
// For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
// LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
// we need 5 ms of past signal to create the input of LPC analysis.
static const int kNumPastSignalSamples = kSampleRateHz / 200;
// TODO(turajs): maybe defining this at a higher level (maybe enum) so that
// all the code recognize it as "no-error."
static const int kNoError = 0;
static const int kNum10msSubframes = 3;
static const int kNumSubframeSamples = kSampleRateHz / 100;
static const int kNumSamplesToProcess = kNum10msSubframes *
kNumSubframeSamples; // Samples in 30 ms @ given sampling rate.
static const int kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess;
static const int kIpLength = kDftSize >> 1;
static const int kWLength = kDftSize >> 1;
static const int kLpcOrder = 16;
int ip_[kIpLength];
float w_fft_[kWLength];
// A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ).
float audio_buffer_[kBufferLength];
int num_buffer_samples_;
double log_old_gain_;
double old_lag_;
scoped_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
scoped_ptr<PreFiltBankstr> pre_filter_handle_;
scoped_ptr<PoleZeroFilter> high_pass_filter_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_H_

View File

@ -0,0 +1,81 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_INTERNAL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_INTERNAL_H_
#include "webrtc/system_wrappers/interface/compile_assert.h"
namespace webrtc {
// These values should match MATLAB counterparts for unit-tests to pass.
static const double kCorrWeight[] = {
1.000000, 0.985000, 0.970225, 0.955672, 0.941337, 0.927217, 0.913308,
0.899609, 0.886115, 0.872823, 0.859730, 0.846834, 0.834132, 0.821620,
0.809296, 0.797156, 0.785199
};
static const double kLpcAnalWin[] = {
0.00000000, 0.01314436, 0.02628645, 0.03942400, 0.05255473, 0.06567639,
0.07878670, 0.09188339, 0.10496421, 0.11802689, 0.13106918, 0.14408883,
0.15708358, 0.17005118, 0.18298941, 0.19589602, 0.20876878, 0.22160547,
0.23440387, 0.24716177, 0.25987696, 0.27254725, 0.28517045, 0.29774438,
0.31026687, 0.32273574, 0.33514885, 0.34750406, 0.35979922, 0.37203222,
0.38420093, 0.39630327, 0.40833713, 0.42030043, 0.43219112, 0.44400713,
0.45574642, 0.46740697, 0.47898676, 0.49048379, 0.50189608, 0.51322164,
0.52445853, 0.53560481, 0.54665854, 0.55761782, 0.56848075, 0.57924546,
0.58991008, 0.60047278, 0.61093173, 0.62128512, 0.63153117, 0.64166810,
0.65169416, 0.66160761, 0.67140676, 0.68108990, 0.69065536, 0.70010148,
0.70942664, 0.71862923, 0.72770765, 0.73666033, 0.74548573, 0.75418233,
0.76274862, 0.77118312, 0.77948437, 0.78765094, 0.79568142, 0.80357442,
0.81132858, 0.81894256, 0.82641504, 0.83374472, 0.84093036, 0.84797069,
0.85486451, 0.86161063, 0.86820787, 0.87465511, 0.88095122, 0.88709512,
0.89308574, 0.89892206, 0.90460306, 0.91012776, 0.91549520, 0.92070447,
0.92575465, 0.93064488, 0.93537432, 0.93994213, 0.94434755, 0.94858979,
0.95266814, 0.95658189, 0.96033035, 0.96391289, 0.96732888, 0.97057773,
0.97365889, 0.97657181, 0.97931600, 0.98189099, 0.98429632, 0.98653158,
0.98859639, 0.99049038, 0.99221324, 0.99376466, 0.99514438, 0.99635215,
0.99738778, 0.99825107, 0.99894188, 0.99946010, 0.99980562, 0.99997840,
0.99997840, 0.99980562, 0.99946010, 0.99894188, 0.99825107, 0.99738778,
0.99635215, 0.99514438, 0.99376466, 0.99221324, 0.99049038, 0.98859639,
0.98653158, 0.98429632, 0.98189099, 0.97931600, 0.97657181, 0.97365889,
0.97057773, 0.96732888, 0.96391289, 0.96033035, 0.95658189, 0.95266814,
0.94858979, 0.94434755, 0.93994213, 0.93537432, 0.93064488, 0.92575465,
0.92070447, 0.91549520, 0.91012776, 0.90460306, 0.89892206, 0.89308574,
0.88709512, 0.88095122, 0.87465511, 0.86820787, 0.86161063, 0.85486451,
0.84797069, 0.84093036, 0.83374472, 0.82641504, 0.81894256, 0.81132858,
0.80357442, 0.79568142, 0.78765094, 0.77948437, 0.77118312, 0.76274862,
0.75418233, 0.74548573, 0.73666033, 0.72770765, 0.71862923, 0.70942664,
0.70010148, 0.69065536, 0.68108990, 0.67140676, 0.66160761, 0.65169416,
0.64166810, 0.63153117, 0.62128512, 0.61093173, 0.60047278, 0.58991008,
0.57924546, 0.56848075, 0.55761782, 0.54665854, 0.53560481, 0.52445853,
0.51322164, 0.50189608, 0.49048379, 0.47898676, 0.46740697, 0.45574642,
0.44400713, 0.43219112, 0.42030043, 0.40833713, 0.39630327, 0.38420093,
0.37203222, 0.35979922, 0.34750406, 0.33514885, 0.32273574, 0.31026687,
0.29774438, 0.28517045, 0.27254725, 0.25987696, 0.24716177, 0.23440387,
0.22160547, 0.20876878, 0.19589602, 0.18298941, 0.17005118, 0.15708358,
0.14408883, 0.13106918, 0.11802689, 0.10496421, 0.09188339, 0.07878670,
0.06567639, 0.05255473, 0.03942400, 0.02628645, 0.01314436, 0.00000000
};
static const int kFilterOrder = 2;
static const float kCoeffNumerator[kFilterOrder + 1] = {0.974827f, -1.949650f,
0.974827f};
static const float kCoeffDenominator[kFilterOrder + 1] = {1.0f, -1.971999f,
0.972457f};
COMPILE_ASSERT(kFilterOrder + 1 == sizeof(kCoeffNumerator) /
sizeof(kCoeffNumerator[0]), numerator_coefficients_incorrect_size);
COMPILE_ASSERT(kFilterOrder + 1 == sizeof(kCoeffDenominator) /
sizeof(kCoeffDenominator[0]), denominator_coefficients_incorrect_size);
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AUDIO_PROCESSING_H_

View File

@ -0,0 +1,61 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// We don't test the value of pitch gain and lags as they are created by iSAC
// routines. However, interpolation of pitch-gain and lags is in a separate
// class and has its own unit-test.
#include "webrtc/modules/audio_processing/agc/agc_audio_proc.h"
#include <math.h>
#include <stdio.h>
#include "gtest/gtest.h"
#include "webrtc/modules/audio_processing/agc/common.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/test/testsupport/fileutils.h"
namespace webrtc {
TEST(AudioProcessingTest, DISABLED_ComputingFirstSpectralPeak) {
AgcAudioProc audioproc;
std::string peak_file_name =
test::ResourcePath("audio_processing/agc/agc_spectral_peak", "dat");
FILE* peak_file = fopen(peak_file_name.c_str(), "rb");
ASSERT_TRUE(peak_file != NULL);
std::string pcm_file_name =
test::ResourcePath("audio_processing/agc/agc_audio", "pcm");
FILE* pcm_file = fopen(pcm_file_name.c_str(), "rb");
ASSERT_TRUE(pcm_file != NULL);
// Read 10 ms audio in each iteration.
const size_t kDataLength = kLength10Ms;
int16_t data[kDataLength] = { 0 };
AudioFeatures features;
double sp[kMaxNumFrames];
while (fread(data, sizeof(int16_t), kDataLength, pcm_file) == kDataLength) {
audioproc.ExtractFeatures(data, kDataLength, &features);
if (features.num_frames > 0) {
ASSERT_LT(features.num_frames, kMaxNumFrames);
// Read reference values.
const size_t num_frames = features.num_frames;
ASSERT_EQ(num_frames, fread(sp, sizeof(sp[0]), num_frames, peak_file));
for (int n = 0; n < features.num_frames; n++)
EXPECT_NEAR(features.spectral_peak[n], sp[n], 3);
}
}
fclose(peak_file);
fclose(pcm_file);
}
} // namespace webrtc

View File

@ -0,0 +1,436 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
#include <cassert>
#include <cmath>
#ifdef WEBRTC_AGC_DEBUG_DUMP
#include <cstdio>
#endif
#include "webrtc/modules/audio_processing/agc/gain_map_internal.h"
#include "webrtc/modules/audio_processing/gain_control_impl.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/system_wrappers/interface/compile_assert.h"
#include "webrtc/system_wrappers/interface/logging.h"
namespace webrtc {
namespace {
// Lowest the microphone level can be lowered due to clipping.
const int kClippedLevelMin = 170;
// Amount the microphone level is lowered with every clipping event.
const int kClippedLevelStep = 15;
// Proportion of clipped samples required to declare a clipping event.
const float kClippedRatioThreshold = 0.1f;
// Time in frames to wait after a clipping event before checking again.
const int kClippedWaitFrames = 300;
// Amount of error we tolerate in the microphone level (presumably due to OS
// quantization) before we assume the user has manually adjusted the microphone.
const int kLevelQuantizationSlack = 25;
const int kDefaultCompressionGain = 7;
const int kMaxCompressionGain = 12;
const int kMinCompressionGain = 2;
// Controls the rate of compression changes towards the target.
const float kCompressionGainStep = 0.05f;
const int kMaxMicLevel = 255;
COMPILE_ASSERT(kGainMapSize > kMaxMicLevel, gain_map_too_small);
const int kMinMicLevel = 12;
const int kMinInitMicLevel = 85;
// Prevent very large microphone level changes.
const int kMaxResidualGainChange = 15;
// Maximum additional gain allowed to compensate for microphone level
// restrictions from clipping events.
const int kSurplusCompressionGain = 6;
int LevelFromGainError(int gain_error, int level) {
assert(level >= 0 && level <= kMaxMicLevel);
if (gain_error == 0) {
return level;
}
// TODO(ajm): Could be made more efficient with a binary search.
int new_level = level;
if (gain_error > 0) {
while (kGainMap[new_level] - kGainMap[level] < gain_error &&
new_level < kMaxMicLevel) {
++new_level;
}
} else {
while (kGainMap[new_level] - kGainMap[level] > gain_error &&
new_level > kMinMicLevel) {
--new_level;
}
}
return new_level;
}
} // namespace
// Facility for dumping debug audio files. All methods are no-ops in the
// default case where WEBRTC_AGC_DEBUG_DUMP is undefined.
class DebugFile {
#ifdef WEBRTC_AGC_DEBUG_DUMP
public:
explicit DebugFile(const char* filename)
: file_(fopen(filename, "wb")) {
assert(file_);
}
~DebugFile() {
fclose(file_);
}
void Write(const int16_t* data, int length_samples) {
fwrite(data, 1, length_samples * sizeof(int16_t), file_);
}
private:
FILE* file_;
#else
public:
explicit DebugFile(const char* filename) {
}
~DebugFile() {
}
void Write(const int16_t* data, int length_samples) {
}
#endif // WEBRTC_AGC_DEBUG_DUMP
};
AgcManagerDirect::AgcManagerDirect(GainControl* gctrl,
VolumeCallbacks* volume_callbacks)
: agc_(new Agc()),
gctrl_(gctrl),
volume_callbacks_(volume_callbacks),
frames_since_clipped_(kClippedWaitFrames),
level_(0),
max_level_(kMaxMicLevel),
max_compression_gain_(kMaxCompressionGain),
target_compression_(kDefaultCompressionGain),
compression_(target_compression_),
compression_accumulator_(compression_),
capture_muted_(false),
check_volume_on_next_process_(true), // Check at startup.
startup_(true),
file_preproc_(new DebugFile("agc_preproc.pcm")),
file_postproc_(new DebugFile("agc_postproc.pcm")) {
}
AgcManagerDirect::AgcManagerDirect(Agc* agc,
GainControl* gctrl,
VolumeCallbacks* volume_callbacks)
: agc_(agc),
gctrl_(gctrl),
volume_callbacks_(volume_callbacks),
frames_since_clipped_(kClippedWaitFrames),
level_(0),
max_level_(kMaxMicLevel),
max_compression_gain_(kMaxCompressionGain),
target_compression_(kDefaultCompressionGain),
compression_(target_compression_),
compression_accumulator_(compression_),
capture_muted_(false),
check_volume_on_next_process_(true), // Check at startup.
startup_(true),
file_preproc_(new DebugFile("agc_preproc.pcm")),
file_postproc_(new DebugFile("agc_postproc.pcm")) {
}
AgcManagerDirect::~AgcManagerDirect() {}
int AgcManagerDirect::Initialize() {
max_level_ = kMaxMicLevel;
max_compression_gain_ = kMaxCompressionGain;
target_compression_ = kDefaultCompressionGain;
compression_ = target_compression_;
compression_accumulator_ = compression_;
capture_muted_ = false;
check_volume_on_next_process_ = true;
// TODO(bjornv): Investigate if we need to reset |startup_| as well. For
// example, what happens when we change devices.
if (gctrl_->set_mode(GainControl::kFixedDigital) != 0) {
LOG_FERR1(LS_ERROR, set_mode, GainControl::kFixedDigital);
return -1;
}
if (gctrl_->set_target_level_dbfs(2) != 0) {
LOG_FERR1(LS_ERROR, set_target_level_dbfs, 2);
return -1;
}
if (gctrl_->set_compression_gain_db(kDefaultCompressionGain) != 0) {
LOG_FERR1(LS_ERROR, set_compression_gain_db, kDefaultCompressionGain);
return -1;
}
if (gctrl_->enable_limiter(true) != 0) {
LOG_FERR1(LS_ERROR, enable_limiter, true);
return -1;
}
return 0;
}
void AgcManagerDirect::AnalyzePreProcess(int16_t* audio,
int num_channels,
int samples_per_channel) {
int length = num_channels * samples_per_channel;
if (capture_muted_) {
return;
}
file_preproc_->Write(audio, length);
if (frames_since_clipped_ < kClippedWaitFrames) {
++frames_since_clipped_;
return;
}
// Check for clipped samples, as the AGC has difficulty detecting pitch
// under clipping distortion. We do this in the preprocessing phase in order
// to catch clipped echo as well.
//
// If we find a sufficiently clipped frame, drop the current microphone level
// and enforce a new maximum level, dropped the same amount from the current
// maximum. This harsh treatment is an effort to avoid repeated clipped echo
// events. As compensation for this restriction, the maximum compression
// gain is increased, through SetMaxLevel().
float clipped_ratio = agc_->AnalyzePreproc(audio, length);
if (clipped_ratio > kClippedRatioThreshold) {
LOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio="
<< clipped_ratio;
// Always decrease the maximum level, even if the current level is below
// threshold.
SetMaxLevel(std::max(kClippedLevelMin, max_level_ - kClippedLevelStep));
if (level_ > kClippedLevelMin) {
// Don't try to adjust the level if we're already below the limit. As
// a consequence, if the user has brought the level above the limit, we
// will still not react until the postproc updates the level.
SetLevel(std::max(kClippedLevelMin, level_ - kClippedLevelStep));
// Reset the AGC since the level has changed.
agc_->Reset();
}
frames_since_clipped_ = 0;
}
}
void AgcManagerDirect::Process(const int16_t* audio,
int length,
int sample_rate_hz) {
if (capture_muted_) {
return;
}
if (check_volume_on_next_process_) {
check_volume_on_next_process_ = false;
// We have to wait until the first process call to check the volume,
// because Chromium doesn't guarantee it to be valid any earlier.
CheckVolumeAndReset();
}
if (agc_->Process(audio, length, sample_rate_hz) != 0) {
LOG_FERR0(LS_ERROR, Agc::Process);
assert(false);
}
UpdateGain();
UpdateCompressor();
file_postproc_->Write(audio, length);
}
void AgcManagerDirect::SetLevel(int new_level) {
int voe_level = volume_callbacks_->GetMicVolume();
if (voe_level < 0) {
return;
}
if (voe_level == 0) {
LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action.";
return;
}
if (voe_level > kMaxMicLevel) {
LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << voe_level;
return;
}
if (voe_level > level_ + kLevelQuantizationSlack ||
voe_level < level_ - kLevelQuantizationSlack) {
LOG(LS_INFO) << "[agc] Mic volume was manually adjusted. Updating "
<< "stored level from " << level_ << " to " << voe_level;
level_ = voe_level;
// Always allow the user to increase the volume.
if (level_ > max_level_) {
SetMaxLevel(level_);
}
// Take no action in this case, since we can't be sure when the volume
// was manually adjusted. The compressor will still provide some of the
// desired gain change.
agc_->Reset();
return;
}
new_level = std::min(new_level, max_level_);
if (new_level == level_) {
return;
}
volume_callbacks_->SetMicVolume(new_level);
LOG(LS_INFO) << "[agc] voe_level=" << voe_level << ", "
<< "level_=" << level_ << ", "
<< "new_level=" << new_level;
level_ = new_level;
}
void AgcManagerDirect::SetMaxLevel(int level) {
assert(level >= kClippedLevelMin);
max_level_ = level;
// Scale the |kSurplusCompressionGain| linearly across the restricted
// level range.
max_compression_gain_ = kMaxCompressionGain + std::floor(
(1.f * kMaxMicLevel - max_level_) / (kMaxMicLevel - kClippedLevelMin) *
kSurplusCompressionGain + 0.5f);
LOG(LS_INFO) << "[agc] max_level_=" << max_level_
<< ", max_compression_gain_=" << max_compression_gain_;
}
void AgcManagerDirect::SetCaptureMuted(bool muted) {
if (capture_muted_ == muted) {
return;
}
capture_muted_ = muted;
if (!muted) {
// When we unmute, we should reset things to be safe.
check_volume_on_next_process_ = true;
}
}
float AgcManagerDirect::voice_probability() {
return static_cast<float>(agc_->voice_probability());
}
int AgcManagerDirect::CheckVolumeAndReset() {
int level = volume_callbacks_->GetMicVolume();
if (level < 0) {
return -1;
}
// Reasons for taking action at startup:
// 1) A person starting a call is expected to be heard.
// 2) Independent of interpretation of |level| == 0 we should raise it so the
// AGC can do its job properly.
if (level == 0 && !startup_) {
LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action.";
return 0;
}
if (level > kMaxMicLevel) {
LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << level;
return -1;
}
LOG(LS_INFO) << "[agc] Initial GetMicVolume()=" << level;
int minLevel = startup_ ? kMinInitMicLevel : kMinMicLevel;
if (level < minLevel) {
level = minLevel;
LOG(LS_INFO) << "[agc] Initial volume too low, raising to " << level;
volume_callbacks_->SetMicVolume(level);
}
agc_->Reset();
level_ = level;
startup_ = false;
return 0;
}
// Requests the RMS error from AGC and distributes the required gain change
// between the digital compression stage and volume slider. We use the
// compressor first, providing a slack region around the current slider
// position to reduce movement.
//
// If the slider needs to be moved, we check first if the user has adjusted
// it, in which case we take no action and cache the updated level.
void AgcManagerDirect::UpdateGain() {
int rms_error = 0;
if (!agc_->GetRmsErrorDb(&rms_error)) {
// No error update ready.
return;
}
// The compressor will always add at least kMinCompressionGain. In effect,
// this adjusts our target gain upward by the same amount and rms_error
// needs to reflect that.
rms_error += kMinCompressionGain;
// Handle as much error as possible with the compressor first.
int raw_compression = std::max(std::min(rms_error, max_compression_gain_),
kMinCompressionGain);
// Deemphasize the compression gain error. Move halfway between the current
// target and the newly received target. This serves to soften perceptible
// intra-talkspurt adjustments, at the cost of some adaptation speed.
if ((raw_compression == max_compression_gain_ &&
target_compression_ == max_compression_gain_ - 1) ||
(raw_compression == kMinCompressionGain &&
target_compression_ == kMinCompressionGain + 1)) {
// Special case to allow the target to reach the endpoints of the
// compression range. The deemphasis would otherwise halt it at 1 dB shy.
target_compression_ = raw_compression;
} else {
target_compression_ = (raw_compression - target_compression_) / 2
+ target_compression_;
}
// Residual error will be handled by adjusting the volume slider. Use the
// raw rather than deemphasized compression here as we would otherwise
// shrink the amount of slack the compressor provides.
int residual_gain = rms_error - raw_compression;
residual_gain = std::min(std::max(residual_gain, -kMaxResidualGainChange),
kMaxResidualGainChange);
LOG(LS_INFO) << "[agc] rms_error=" << rms_error << ", "
<< "target_compression=" << target_compression_ << ", "
<< "residual_gain=" << residual_gain;
if (residual_gain == 0)
return;
SetLevel(LevelFromGainError(residual_gain, level_));
}
void AgcManagerDirect::UpdateCompressor() {
if (compression_ == target_compression_) {
return;
}
// Adapt the compression gain slowly towards the target, in order to avoid
// highly perceptible changes.
if (target_compression_ > compression_) {
compression_accumulator_ += kCompressionGainStep;
} else {
compression_accumulator_ -= kCompressionGainStep;
}
// The compressor accepts integer gains in dB. Adjust the gain when
// we've come within half a stepsize of the nearest integer. (We don't
// check for equality due to potential floating point imprecision).
int new_compression = compression_;
int nearest_neighbor = std::floor(compression_accumulator_ + 0.5);
if (std::fabs(compression_accumulator_ - nearest_neighbor) <
kCompressionGainStep / 2) {
new_compression = nearest_neighbor;
}
// Set the new compression gain.
if (new_compression != compression_) {
compression_ = new_compression;
compression_accumulator_ = new_compression;
if (gctrl_->set_compression_gain_db(compression_) != 0) {
LOG_FERR1(LS_ERROR, set_compression_gain_db, compression_);
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,98 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
#include "webrtc/modules/audio_processing/agc/agc.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
namespace webrtc {
class AudioFrame;
class DebugFile;
class GainControl;
// Callbacks that need to be injected into AgcManagerDirect to read and control
// the volume values. They have different behavior if they are called from
// AgcManager or AudioProcessing. This is done to remove the VoiceEngine
// dependency in AgcManagerDirect.
class VolumeCallbacks {
public:
virtual ~VolumeCallbacks() {}
virtual void SetMicVolume(int volume) = 0;
virtual int GetMicVolume() = 0;
};
// Direct interface to use AGC to set volume and compression values.
// AudioProcessing uses this interface directly to integrate the callback-less
// AGC. AgcManager delegates most of its calls here. See agc_manager.h for
// undocumented methods.
//
// This class is not thread-safe.
class AgcManagerDirect {
public:
// AgcManagerDirect will configure GainControl internally. The user is
// responsible for processing the audio using it after the call to Process.
AgcManagerDirect(GainControl* gctrl, VolumeCallbacks* volume_callbacks);
// Dependency injection for testing. Don't delete |agc| as the memory is owned
// by the manager.
AgcManagerDirect(Agc* agc,
GainControl* gctrl,
VolumeCallbacks* volume_callbacks);
~AgcManagerDirect();
int Initialize();
void AnalyzePreProcess(int16_t* audio,
int num_channels,
int samples_per_channel);
void Process(const int16_t* audio, int length, int sample_rate_hz);
// Sets a new microphone level, after first checking that it hasn't been
// updated by the user, in which case no action is taken.
void SetLevel(int new_level);
// Set the maximum level the AGC is allowed to apply. Also updates the
// maximum compression gain to compensate. The level must be at least
// |kClippedLevelMin|.
void SetMaxLevel(int level);
void SetCaptureMuted(bool muted);
bool capture_muted() { return capture_muted_; }
float voice_probability();
private:
int CheckVolumeAndReset();
void UpdateGain();
void UpdateCompressor();
scoped_ptr<Agc> agc_;
GainControl* gctrl_;
VolumeCallbacks* volume_callbacks_;
int frames_since_clipped_;
int level_;
int max_level_;
int max_compression_gain_;
int target_compression_;
int compression_;
float compression_accumulator_;
bool capture_muted_;
bool check_volume_on_next_process_;
bool startup_;
scoped_ptr<DebugFile> file_preproc_;
scoped_ptr<DebugFile> file_postproc_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_

View File

@ -0,0 +1,162 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/agc.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "webrtc/modules/audio_processing/agc/test/test_utils.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/test/testsupport/fileutils.h"
using ::testing::_;
using ::testing::AllOf;
using ::testing::AtLeast;
using ::testing::Eq;
using ::testing::Gt;
using ::testing::InSequence;
using ::testing::Lt;
using ::testing::Mock;
using ::testing::SaveArg;
namespace webrtc {
namespace {
// The tested values depend on this assumed gain.
const int kMaxGain = 80;
MATCHER_P(GtPointee, p, "") { return arg > *p; }
MATCHER_P(LtPointee, p, "") { return arg < *p; }
class AgcChecker {
public:
MOCK_METHOD2(LevelChanged, void(int iterations, int level));
};
class AgcTest : public ::testing::Test {
protected:
AgcTest()
: agc_(),
checker_(),
mic_level_(128) {
}
// A gain of <= -100 will zero out the signal.
void RunAgc(int iterations, float gain_db) {
FILE* input_file = fopen(
test::ResourcePath("voice_engine/audio_long16", "pcm").c_str(), "rb");
ASSERT_TRUE(input_file != NULL);
AudioFrame frame;
frame.sample_rate_hz_ = 16000;
frame.num_channels_ = 1;
frame.samples_per_channel_ = frame.sample_rate_hz_ / 100;
const size_t length = frame.samples_per_channel_ * frame.num_channels_;
float gain = Db2Linear(gain_db);
if (gain_db <= -100) {
gain = 0;
}
for (int i = 0; i < iterations; ++i) {
ASSERT_EQ(length, fread(frame.data_, sizeof(int16_t), length,
input_file));
SimulateMic(kMaxGain, mic_level_, &frame);
ApplyGainLinear(gain, &frame);
ASSERT_GE(agc_.Process(frame), 0);
int mic_level = agc_.MicLevel();
if (mic_level != mic_level_) {
printf("mic_level=%d\n", mic_level);
checker_.LevelChanged(i, mic_level);
}
mic_level_ = mic_level;
}
fclose(input_file);
}
Agc agc_;
AgcChecker checker_;
// Stores mic level between multiple runs of RunAgc in one test.
int mic_level_;
};
TEST_F(AgcTest, UpwardsChangeIsLimited) {
{
InSequence seq;
EXPECT_CALL(checker_, LevelChanged(Lt(500), Eq(179))).Times(1);
EXPECT_CALL(checker_, LevelChanged(_, Gt(179))).Times(AtLeast(1));
}
RunAgc(1000, -40);
}
TEST_F(AgcTest, DownwardsChangeIsLimited) {
{
InSequence seq;
EXPECT_CALL(checker_, LevelChanged(Lt(500), Eq(77))).Times(1);
EXPECT_CALL(checker_, LevelChanged(_, Lt(77))).Times(AtLeast(1));
}
RunAgc(1000, 40);
}
TEST_F(AgcTest, MovesUpToMaxAndDownToMin) {
int last_level = 128;
EXPECT_CALL(checker_, LevelChanged(_, GtPointee(&last_level)))
.Times(AtLeast(2))
.WillRepeatedly(SaveArg<1>(&last_level));
RunAgc(1000, -30);
EXPECT_EQ(255, last_level);
Mock::VerifyAndClearExpectations(&checker_);
EXPECT_CALL(checker_, LevelChanged(_, LtPointee(&last_level)))
.Times(AtLeast(2))
.WillRepeatedly(SaveArg<1>(&last_level));
RunAgc(1000, 50);
EXPECT_EQ(1, last_level);
}
TEST_F(AgcTest, HandlesZeroSignal) {
int last_level = 128;
// Doesn't respond to a zero signal.
EXPECT_CALL(checker_, LevelChanged(_, _)).Times(0);
RunAgc(1000, -100);
Mock::VerifyAndClearExpectations(&checker_);
// Reacts as usual afterwards.
EXPECT_CALL(checker_, LevelChanged(_, GtPointee(&last_level)))
.Times(AtLeast(2))
.WillRepeatedly(SaveArg<1>(&last_level));
RunAgc(500, -20);
}
TEST_F(AgcTest, ReachesSteadyState) {
int last_level = 128;
EXPECT_CALL(checker_, LevelChanged(_, _))
.Times(AtLeast(2))
.WillRepeatedly(SaveArg<1>(&last_level));
RunAgc(1000, -20);
Mock::VerifyAndClearExpectations(&checker_);
// If the level changes, it should be in a narrow band around the previous
// adaptation.
EXPECT_CALL(checker_, LevelChanged(_,
AllOf(Gt(last_level * 0.95), Lt(last_level * 1.05))))
.Times(AtLeast(0));
RunAgc(1000, -20);
}
// TODO(ajm): Add this test; requires measuring the signal RMS.
TEST_F(AgcTest, AdaptsToCorrectRMS) {
}
} // namespace
} // namespace webrtc

View File

@ -0,0 +1,136 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/circular_buffer.h"
#include <assert.h>
#include <stdlib.h>
namespace webrtc {
AgcCircularBuffer::AgcCircularBuffer(int buffer_size)
: buffer_(new double[buffer_size]),
is_full_(false),
index_(0),
buffer_size_(buffer_size),
sum_(0) {}
AgcCircularBuffer::~AgcCircularBuffer() {}
void AgcCircularBuffer::Reset() {
is_full_ = false;
index_ = 0;
sum_ = 0;
}
AgcCircularBuffer* AgcCircularBuffer::Create(int buffer_size) {
if (buffer_size <= 0)
return NULL;
return new AgcCircularBuffer(buffer_size);
}
double AgcCircularBuffer::Oldest() const {
if (!is_full_)
return buffer_[0];
else
return buffer_[index_];
}
double AgcCircularBuffer::Mean() {
double m;
if (is_full_) {
m = sum_ / buffer_size_;
} else {
if (index_ > 0)
m = sum_ / index_;
else
m = 0;
}
return m;
}
void AgcCircularBuffer::Insert(double value) {
if (is_full_) {
sum_ -= buffer_[index_];
}
sum_ += value;
buffer_[index_] = value;
index_++;
if (index_ >= buffer_size_) {
is_full_ = true;
index_ = 0;
}
}
int AgcCircularBuffer::BufferLevel() {
if (is_full_)
return buffer_size_;
return index_;
}
int AgcCircularBuffer::Get(int index, double* value) const {
int err = ConvertToLinearIndex(&index);
if (err < 0)
return -1;
*value = buffer_[index];
return 0;
}
int AgcCircularBuffer::Set(int index, double value) {
int err = ConvertToLinearIndex(&index);
if (err < 0)
return -1;
sum_ -= buffer_[index];
buffer_[index] = value;
sum_ += value;
return 0;
}
int AgcCircularBuffer::ConvertToLinearIndex(int* index) const {
if (*index < 0 || *index >= buffer_size_)
return -1;
if (!is_full_ && *index >= index_)
return -1;
*index = index_ - 1 - *index;
if (*index < 0)
*index += buffer_size_;
return 0;
}
int AgcCircularBuffer::RemoveTransient(int width_threshold,
double val_threshold) {
if (!is_full_ && index_ < width_threshold + 2)
return 0;
int index_1 = 0;
int index_2 = width_threshold + 1;
double v = 0;
if (Get(index_1, &v) < 0)
return -1;
if (v < val_threshold) {
Set(index_1, 0);
int index;
for (index = index_2; index > index_1; index--) {
if (Get(index, &v) < 0)
return -1;
if (v < val_threshold)
break;
}
for (; index > index_1; index--) {
if (Set(index, 0.0) < 0)
return -1;
}
}
return 0;
}
} // namespace webrtc

View File

@ -0,0 +1,69 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_CIRCULAR_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_CIRCULAR_BUFFER_H_
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
namespace webrtc {
// A circular buffer tailored to the need of this project. It stores last
// K samples of the input, and keeps track of the mean of the last samples.
//
// It is used in class "PitchBasedActivity" to keep track of posterior
// probabilities in the past few seconds. The posterior probabilities are used
// to recursively update prior probabilities.
class AgcCircularBuffer {
public:
static AgcCircularBuffer* Create(int buffer_size);
~AgcCircularBuffer();
// If buffer is wrapped around.
bool is_full() const { return is_full_; }
// Get the oldest entry in the buffer.
double Oldest() const;
// Insert new value into the buffer.
void Insert(double value);
// Reset buffer, forget the past, start fresh.
void Reset();
// The mean value of the elements in the buffer. The return value is zero if
// buffer is empty, i.e. no value is inserted.
double Mean();
// Remove transients. If the values exceed |val_threshold| for a period
// shorter then or equal to |width_threshold|, then that period is considered
// transient and set to zero.
int RemoveTransient(int width_threshold, double val_threshold);
private:
explicit AgcCircularBuffer(int buffer_size);
// Get previous values. |index = 0| corresponds to the most recent
// insertion. |index = 1| is the one before the most recent insertion, and
// so on.
int Get(int index, double* value) const;
// Set a given position to |value|. |index| is interpreted as above.
int Set(int index, double value);
// Return the number of valid elements in the buffer.
int BufferLevel();
// Convert an index with the interpretation as get() method to the
// corresponding linear index.
int ConvertToLinearIndex(int* index) const;
scoped_ptr<double[]> buffer_;
bool is_full_;
int index_;
int buffer_size_;
double sum_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_CIRCULAR_BUFFER_H_

View File

@ -0,0 +1,132 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/circular_buffer.h"
#include <stdio.h>
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
namespace webrtc {
static const int kWidthThreshold = 7;
static const double kValThreshold = 1.0;
static const int kLongBuffSize = 100;
static const int kShortBuffSize = 10;
static void InsertSequentially(int k, AgcCircularBuffer* circular_buffer) {
double mean_val;
for (int n = 1; n <= k; n++) {
EXPECT_TRUE(!circular_buffer->is_full());
circular_buffer->Insert(n);
mean_val = circular_buffer->Mean();
EXPECT_EQ((n + 1.0) / 2., mean_val);
}
}
static void Insert(double value, int num_insertion,
AgcCircularBuffer* circular_buffer) {
for (int n = 0; n < num_insertion; n++)
circular_buffer->Insert(value);
}
static void InsertZeros(int num_zeros, AgcCircularBuffer* circular_buffer) {
Insert(0.0, num_zeros, circular_buffer);
}
TEST(AgcCircularBufferTest, GeneralTest) {
scoped_ptr<AgcCircularBuffer> circular_buffer(
AgcCircularBuffer::Create(kShortBuffSize));
double mean_val;
// Mean should return zero if nothing is inserted.
mean_val = circular_buffer->Mean();
EXPECT_DOUBLE_EQ(0.0, mean_val);
InsertSequentially(kShortBuffSize, circular_buffer.get());
// Should be full.
EXPECT_TRUE(circular_buffer->is_full());
// Correct update after being full.
for (int n = 1; n < kShortBuffSize; n++) {
circular_buffer->Insert(n);
mean_val = circular_buffer->Mean();
EXPECT_DOUBLE_EQ((kShortBuffSize + 1.) / 2., mean_val);
EXPECT_TRUE(circular_buffer->is_full());
}
// Check reset. This should be like starting fresh.
circular_buffer->Reset();
mean_val = circular_buffer->Mean();
EXPECT_DOUBLE_EQ(0, mean_val);
InsertSequentially(kShortBuffSize, circular_buffer.get());
EXPECT_TRUE(circular_buffer->is_full());
}
TEST(AgcCircularBufferTest, TransientsRemoval) {
scoped_ptr<AgcCircularBuffer> circular_buffer(
AgcCircularBuffer::Create(kLongBuffSize));
// Let the first transient be in wrap-around.
InsertZeros(kLongBuffSize - kWidthThreshold / 2, circular_buffer.get());
double push_val = kValThreshold;
double mean_val;
for (int k = kWidthThreshold; k >= 1; k--) {
Insert(push_val, k, circular_buffer.get());
circular_buffer->Insert(0);
mean_val = circular_buffer->Mean();
EXPECT_DOUBLE_EQ(k * push_val / kLongBuffSize, mean_val);
circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold);
mean_val = circular_buffer->Mean();
EXPECT_DOUBLE_EQ(0, mean_val);
}
}
TEST(AgcCircularBufferTest, TransientDetection) {
scoped_ptr<AgcCircularBuffer> circular_buffer(
AgcCircularBuffer::Create(kLongBuffSize));
// Let the first transient be in wrap-around.
int num_insertion = kLongBuffSize - kWidthThreshold / 2;
InsertZeros(num_insertion, circular_buffer.get());
double push_val = 2;
// This is longer than a transient and shouldn't be removed.
int num_non_zero_elements = kWidthThreshold + 1;
Insert(push_val, num_non_zero_elements, circular_buffer.get());
double mean_val = circular_buffer->Mean();
EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val);
circular_buffer->Insert(0);
EXPECT_EQ(0, circular_buffer->RemoveTransient(kWidthThreshold,
kValThreshold));
mean_val = circular_buffer->Mean();
EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val);
// A transient right after a non-transient, should be removed and mean is
// not changed.
num_insertion = 3;
Insert(push_val, num_insertion, circular_buffer.get());
circular_buffer->Insert(0);
EXPECT_EQ(0, circular_buffer->RemoveTransient(kWidthThreshold,
kValThreshold));
mean_val = circular_buffer->Mean();
EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val);
// Last input is larger than threshold, although the sequence is short but
// it shouldn't be considered transient.
Insert(push_val, num_insertion, circular_buffer.get());
num_non_zero_elements += num_insertion;
EXPECT_EQ(0, circular_buffer->RemoveTransient(kWidthThreshold,
kValThreshold));
mean_val = circular_buffer->Mean();
EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val);
}
} // namespace webrtc

View File

@ -0,0 +1,27 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_COMMON_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_COMMON_H_
static const int kSampleRateHz = 16000;
static const int kLength10Ms = kSampleRateHz / 100;
static const int kMaxNumFrames = 4;
struct AudioFeatures {
double log_pitch_gain[kMaxNumFrames];
double pitch_lag_hz[kMaxNumFrames];
double spectral_peak[kMaxNumFrames];
double rms[kMaxNumFrames];
int num_frames;
bool silence;
};
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_COMMON_H_

View File

@ -0,0 +1,275 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
static const int kGainMapSize = 256;
// Uses parameters: si = 2, sf = 0.25, D = 8/256
static const int kGainMap[kGainMapSize] = {
-56,
-54,
-52,
-50,
-48,
-47,
-45,
-43,
-42,
-40,
-38,
-37,
-35,
-34,
-33,
-31,
-30,
-29,
-27,
-26,
-25,
-24,
-23,
-22,
-20,
-19,
-18,
-17,
-16,
-15,
-14,
-14,
-13,
-12,
-11,
-10,
-9,
-8,
-8,
-7,
-6,
-5,
-5,
-4,
-3,
-2,
-2,
-1,
0,
0,
1,
1,
2,
3,
3,
4,
4,
5,
5,
6,
6,
7,
7,
8,
8,
9,
9,
10,
10,
11,
11,
12,
12,
13,
13,
13,
14,
14,
15,
15,
15,
16,
16,
17,
17,
17,
18,
18,
18,
19,
19,
19,
20,
20,
21,
21,
21,
22,
22,
22,
23,
23,
23,
24,
24,
24,
24,
25,
25,
25,
26,
26,
26,
27,
27,
27,
28,
28,
28,
28,
29,
29,
29,
30,
30,
30,
30,
31,
31,
31,
32,
32,
32,
32,
33,
33,
33,
33,
34,
34,
34,
35,
35,
35,
35,
36,
36,
36,
36,
37,
37,
37,
38,
38,
38,
38,
39,
39,
39,
39,
40,
40,
40,
40,
41,
41,
41,
41,
42,
42,
42,
42,
43,
43,
43,
44,
44,
44,
44,
45,
45,
45,
45,
46,
46,
46,
46,
47,
47,
47,
47,
48,
48,
48,
48,
49,
49,
49,
49,
50,
50,
50,
50,
51,
51,
51,
51,
52,
52,
52,
52,
53,
53,
53,
53,
54,
54,
54,
54,
55,
55,
55,
55,
56,
56,
56,
56,
57,
57,
57,
57,
58,
58,
58,
58,
59,
59,
59,
59,
60,
60,
60,
60,
61,
61,
61,
61,
62,
62,
62,
62,
63,
63,
63,
63,
64
};
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_

View File

@ -0,0 +1,61 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/gmm.h"
#include <math.h>
#include <stdlib.h>
#include "webrtc/typedefs.h"
namespace webrtc {
static const int kMaxDimension = 10;
static void RemoveMean(const double* in, const double* mean_vec,
int dimension, double* out) {
for (int n = 0; n < dimension; ++n)
out[n] = in[n] - mean_vec[n];
}
static double ComputeExponent(const double* in, const double* covar_inv,
int dimension) {
double q = 0;
for (int i = 0; i < dimension; ++i) {
double v = 0;
for (int j = 0; j < dimension; j++)
v += (*covar_inv++) * in[j];
q += v * in[i];
}
q *= -0.5;
return q;
}
double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters) {
if (gmm_parameters.dimension > kMaxDimension) {
return -1; // This is invalid pdf so the caller can check this.
}
double f = 0;
double v[kMaxDimension];
const double* mean_vec = gmm_parameters.mean;
const double* covar_inv = gmm_parameters.covar_inverse;
for (int n = 0; n < gmm_parameters.num_mixtures; n++) {
RemoveMean(x, mean_vec, gmm_parameters.dimension, v);
double q = ComputeExponent(v, covar_inv, gmm_parameters.dimension) +
gmm_parameters.weight[n];
f += exp(q);
mean_vec += gmm_parameters.dimension;
covar_inv += gmm_parameters.dimension * gmm_parameters.dimension;
}
return f;
}
} // namespace webrtc

View File

@ -0,0 +1,45 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GMM_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GMM_H_
namespace webrtc {
// A structure that specifies a GMM.
// A GMM is formulated as
// f(x) = w[0] * mixture[0] + w[1] * mixture[1] + ... +
// w[num_mixtures - 1] * mixture[num_mixtures - 1];
// Where a 'mixture' is a Gaussian density.
struct GmmParameters {
// weight[n] = log(w[n]) - |dimension|/2 * log(2*pi) - 1/2 * log(det(cov[n]));
// where cov[n] is the covariance matrix of mixture n;
const double* weight;
// pointer to the first element of a |num_mixtures|x|dimension| matrix
// where kth row is the mean of the kth mixture.
const double* mean;
// pointer to the first element of a |num_mixtures|x|dimension|x|dimension|
// 3D-matrix, where the kth 2D-matrix is the inverse of the covariance
// matrix of the kth mixture.
const double* covar_inverse;
// Dimensionality of the mixtures.
int dimension;
// number of the mixtures.
int num_mixtures;
};
// Evaluate the given GMM, according to |gmm_parameters|, at the given point
// |x|. If the dimensionality of the given GMM is larger that the maximum
// acceptable dimension by the following function -1 is returned.
double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters);
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GMM_H_

View File

@ -0,0 +1,65 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/gmm.h"
#include <math.h>
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/modules/audio_processing/agc/noise_gmm_tables.h"
#include "webrtc/modules/audio_processing/agc/voice_gmm_tables.h"
namespace webrtc {
TEST(GmmTest, EvaluateGmm) {
GmmParameters noise_gmm;
GmmParameters voice_gmm;
// Setup noise GMM.
noise_gmm.dimension = kNoiseGmmDim;
noise_gmm.num_mixtures = kNoiseGmmNumMixtures;
noise_gmm.weight = kNoiseGmmWeights;
noise_gmm.mean = &kNoiseGmmMean[0][0];
noise_gmm.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
// Setup voice GMM.
voice_gmm.dimension = kVoiceGmmDim;
voice_gmm.num_mixtures = kVoiceGmmNumMixtures;
voice_gmm.weight = kVoiceGmmWeights;
voice_gmm.mean = &kVoiceGmmMean[0][0];
voice_gmm.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
// Test vectors. These are the mean of the GMM means.
const double kXVoice[kVoiceGmmDim] = {
-1.35893162459863, 602.862491970368, 178.022069191324};
const double kXNoise[kNoiseGmmDim] = {
-2.33443722724409, 2827.97828765184, 141.114178166812};
// Expected pdf values. These values are computed in MATLAB using EvalGmm.m
const double kPdfNoise = 1.88904409403101e-07;
const double kPdfVoice = 1.30453996982266e-06;
// Relative error should be smaller that the following value.
const double kAcceptedRelativeErr = 1e-10;
// Test Voice.
double pdf = EvaluateGmm(kXVoice, voice_gmm);
EXPECT_GT(pdf, 0);
double relative_error = fabs(pdf - kPdfVoice) / kPdfVoice;
EXPECT_LE(relative_error, kAcceptedRelativeErr);
// Test Noise.
pdf = EvaluateGmm(kXNoise, noise_gmm);
EXPECT_GT(pdf, 0);
relative_error = fabs(pdf - kPdfNoise) / kPdfNoise;
EXPECT_LE(relative_error, kAcceptedRelativeErr);
}
} // namespace webrtc

View File

@ -0,0 +1,228 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/histogram.h"
#include <cmath>
#include <cstring>
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/system_wrappers/interface/compile_assert.h"
namespace webrtc {
static const double kHistBinCenters[] = {
7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01,
1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01,
2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01,
3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01,
5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01,
1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00,
1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00,
2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00,
4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00,
7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01,
1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01,
2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01,
3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01,
6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01,
1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02,
1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02,
2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02,
4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02,
8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03,
1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03,
2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03,
3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03,
6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03,
1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04,
1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04,
3.00339145144454e+04, 3.56647189489147e+04};
static const double kProbQDomain = 1024.0;
// Loudness of -15 dB (smallest expected loudness) in log domain,
// loudness_db = 13.5 * log10(rms);
static const double kLogDomainMinBinCenter = -2.57752062648587;
// Loudness step of 1 dB in log domain
static const double kLogDomainStepSizeInverse = 5.81954605750359;
static const int kTransientWidthThreshold = 7;
static const double kLowProbabilityThreshold = 0.2;
static const int kLowProbThresholdQ10 = static_cast<int>(
kLowProbabilityThreshold * kProbQDomain);
Histogram::Histogram()
: num_updates_(0),
audio_content_q10_(0),
bin_count_q10_(),
activity_probability_(),
hist_bin_index_(),
buffer_index_(0),
buffer_is_full_(false),
len_circular_buffer_(0),
len_high_activity_(0) {
COMPILE_ASSERT(kHistSize == sizeof(kHistBinCenters) /
sizeof(kHistBinCenters[0]), histogram_bin_centers_incorrect_size);
}
Histogram::Histogram(int window_size)
: num_updates_(0),
audio_content_q10_(0),
bin_count_q10_(),
activity_probability_(new int[window_size]),
hist_bin_index_(new int[window_size]),
buffer_index_(0),
buffer_is_full_(false),
len_circular_buffer_(window_size),
len_high_activity_(0) {}
Histogram::~Histogram() {}
void Histogram::Update(double rms, double activity_probaility) {
// If circular histogram is activated then remove the oldest entry.
if (len_circular_buffer_ > 0)
RemoveOldestEntryAndUpdate();
// Find the corresponding bin.
int hist_index = GetBinIndex(rms);
// To Q10 domain.
int prob_q10 = static_cast<int16_t>(floor(activity_probaility *
kProbQDomain));
InsertNewestEntryAndUpdate(prob_q10, hist_index);
}
// Doing nothing if buffer is not full, yet.
void Histogram::RemoveOldestEntryAndUpdate() {
assert(len_circular_buffer_ > 0);
// Do nothing if circular buffer is not full.
if (!buffer_is_full_)
return;
int oldest_prob = activity_probability_[buffer_index_];
int oldest_hist_index = hist_bin_index_[buffer_index_];
UpdateHist(-oldest_prob, oldest_hist_index);
}
void Histogram::RemoveTransient() {
// Don't expect to be here if high-activity region is longer than
// |kTransientWidthThreshold| or there has not been any transient.
assert(len_high_activity_ <= kTransientWidthThreshold);
int index = (buffer_index_ > 0) ? (buffer_index_ - 1) :
len_circular_buffer_ - 1;
while (len_high_activity_ > 0) {
UpdateHist(-activity_probability_[index], hist_bin_index_[index]);
activity_probability_[index] = 0;
index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1);
len_high_activity_--;
}
}
void Histogram::InsertNewestEntryAndUpdate(int activity_prob_q10,
int hist_index) {
// Update the circular buffer if it is enabled.
if (len_circular_buffer_ > 0) {
// Removing transient.
if (activity_prob_q10 <= kLowProbThresholdQ10) {
// Lower than threshold probability, set it to zero.
activity_prob_q10 = 0;
// Check if this has been a transient.
if (len_high_activity_ <= kTransientWidthThreshold)
RemoveTransient(); // Remove this transient.
len_high_activity_ = 0;
} else if (len_high_activity_ <= kTransientWidthThreshold) {
len_high_activity_++;
}
// Updating the circular buffer.
activity_probability_[buffer_index_] = activity_prob_q10;
hist_bin_index_[buffer_index_] = hist_index;
// Increment the buffer index and check for wrap-around.
buffer_index_++;
if (buffer_index_ >= len_circular_buffer_) {
buffer_index_ = 0;
buffer_is_full_ = true;
}
}
num_updates_++;
if (num_updates_ < 0)
num_updates_--;
UpdateHist(activity_prob_q10, hist_index);
}
void Histogram::UpdateHist(int activity_prob_q10, int hist_index) {
bin_count_q10_[hist_index] += activity_prob_q10;
audio_content_q10_ += activity_prob_q10;
}
double Histogram::AudioContent() const {
return audio_content_q10_ / kProbQDomain;
}
Histogram* Histogram::Create() {
return new Histogram;
}
Histogram* Histogram::Create(int window_size) {
if (window_size < 0)
return NULL;
return new Histogram(window_size);
}
void Histogram::Reset() {
// Reset the histogram, audio-content and number of updates.
memset(bin_count_q10_, 0, sizeof(bin_count_q10_));
audio_content_q10_ = 0;
num_updates_ = 0;
// Empty the circular buffer.
buffer_index_ = 0;
buffer_is_full_ = false;
len_high_activity_ = 0;
}
int Histogram::GetBinIndex(double rms) {
// First exclude overload cases.
if (rms <= kHistBinCenters[0]) {
return 0;
} else if (rms >= kHistBinCenters[kHistSize - 1]) {
return kHistSize - 1;
} else {
// The quantizer is uniform in log domain. Alternatively we could do binary
// search in linear domain.
double rms_log = log(rms);
int index = static_cast<int>(floor((rms_log - kLogDomainMinBinCenter) *
kLogDomainStepSizeInverse));
// The final decision is in linear domain.
double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]);
if (rms > b) {
return index + 1;
}
return index;
}
}
double Histogram::CurrentRms() const {
double p;
double mean_val = 0;
if (audio_content_q10_ > 0) {
double p_total_inverse = 1. / static_cast<double>(audio_content_q10_);
for (int n = 0; n < kHistSize; n++) {
p = static_cast<double>(bin_count_q10_[n]) * p_total_inverse;
mean_val += p * kHistBinCenters[n];
}
} else {
mean_val = kHistBinCenters[0];
}
return mean_val;
}
} // namespace webrtc

View File

@ -0,0 +1,91 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
#include <string.h>
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// This class implements the histogram of loudness with circular buffers so that
// the histogram tracks the last T seconds of the loudness.
class Histogram {
public:
// Create a non-sliding Histogram.
static Histogram* Create();
// Create a sliding Histogram, i.e. the histogram represents the last
// |window_size| samples.
static Histogram* Create(int window_size);
~Histogram();
// Insert RMS and the corresponding activity probability.
void Update(double rms, double activity_probability);
// Reset the histogram, forget the past.
void Reset();
// Current loudness, which is actually the mean of histogram in loudness
// domain.
double CurrentRms() const;
// Sum of the histogram content.
double AudioContent() const;
// Number of times the histogram has been updated.
int num_updates() const { return num_updates_; }
private:
Histogram();
explicit Histogram(int window);
// Find the histogram bin associated with the given |rms|.
int GetBinIndex(double rms);
void RemoveOldestEntryAndUpdate();
void InsertNewestEntryAndUpdate(int activity_prob_q10, int hist_index);
void UpdateHist(int activity_prob_q10, int hist_index);
void RemoveTransient();
// Number of histogram bins.
static const int kHistSize = 77;
// Number of times the histogram is updated
int num_updates_;
// Audio content, this should be equal to the sum of the components of
// |bin_count_q10_|.
int64_t audio_content_q10_;
// Histogram of input RMS in Q10 with |kHistSize_| bins. In each 'Update(),'
// we increment the associated histogram-bin with the given probability. The
// increment is implemented in Q10 to avoid rounding errors.
int64_t bin_count_q10_[kHistSize];
// Circular buffer for probabilities
scoped_ptr<int[]> activity_probability_;
// Circular buffer for histogram-indices of probabilities.
scoped_ptr<int[]> hist_bin_index_;
// Current index of circular buffer, where the newest data will be written to,
// therefore, pointing to the oldest data if buffer is full.
int buffer_index_;
// Indicating if buffer is full and we had a wrap around.
int buffer_is_full_;
// Size of circular buffer.
int len_circular_buffer_;
int len_high_activity_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_

View File

@ -0,0 +1,104 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Use CreateHistUnittestFile.m to generate the input file.
#include "webrtc/modules/audio_processing/agc/histogram.h"
#include <stdio.h>
#include <cmath>
#include "gtest/gtest.h"
#include "webrtc/test/testsupport/fileutils.h"
#include "webrtc/modules/audio_processing/agc/utility.h"
namespace webrtc {
struct InputOutput {
double rms;
double activity_probability;
double audio_content;
double loudness;
};
const double kRelativeErrTol = 1e-10;
class HistogramTest : public ::testing::Test {
protected:
void RunTest(bool enable_circular_buff,
const char* filename);
private:
void TestClean();
scoped_ptr<Histogram> hist_;
};
void HistogramTest::TestClean() {
EXPECT_EQ(hist_->CurrentRms(), 7.59621091765857e-02);
EXPECT_EQ(hist_->AudioContent(), 0);
EXPECT_EQ(hist_->num_updates(), 0);
}
void HistogramTest::RunTest(bool enable_circular_buff, const char* filename) {
FILE* in_file = fopen(filename, "rb");
ASSERT_TRUE(in_file != NULL);
if (enable_circular_buff) {
int buffer_size;
EXPECT_EQ(fread(&buffer_size, sizeof(buffer_size), 1, in_file), 1u);
hist_.reset(Histogram::Create(buffer_size));
} else {
hist_.reset(Histogram::Create());
}
TestClean();
InputOutput io;
int num_updates = 0;
int num_reset = 0;
while (fread(&io, sizeof(InputOutput), 1, in_file) == 1) {
if (io.rms < 0) {
// We have to reset.
hist_->Reset();
TestClean();
num_updates = 0;
num_reset++;
// Read the next chunk of input.
if (fread(&io, sizeof(InputOutput), 1, in_file) != 1)
break;
}
hist_->Update(io.rms, io.activity_probability);
num_updates++;
EXPECT_EQ(hist_->num_updates(), num_updates);
double audio_content = hist_->AudioContent();
double abs_err = std::min(audio_content, io.audio_content) *
kRelativeErrTol;
ASSERT_NEAR(audio_content, io.audio_content, abs_err);
double current_loudness = Linear2Loudness(hist_->CurrentRms());
abs_err = std::min(fabs(current_loudness), fabs(io.loudness)) *
kRelativeErrTol;
ASSERT_NEAR(current_loudness, io.loudness, abs_err);
}
fclose(in_file);
}
TEST_F(HistogramTest, ActiveCircularBuffer) {
RunTest(true,
test::ResourcePath("audio_processing/agc/agc_with_circular_buffer",
"dat").c_str());
}
TEST_F(HistogramTest, InactiveCircularBuffer) {
RunTest(false,
test::ResourcePath("audio_processing/agc/agc_no_circular_buffer",
"dat").c_str());
}
} // namespace webrtc

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_
#include "webrtc/modules/audio_processing/agc/agc.h"
#include "gmock/gmock.h"
#include "webrtc/modules/interface/module_common_types.h"
namespace webrtc {
class MockAgc : public Agc {
public:
MOCK_METHOD2(AnalyzePreproc, float(const int16_t* audio, int length));
MOCK_METHOD3(Process, int(const int16_t* audio, int length,
int sample_rate_hz));
MOCK_METHOD1(GetRmsErrorDb, bool(int* error));
MOCK_METHOD0(Reset, void());
MOCK_METHOD1(set_target_level_dbfs, int(int level));
MOCK_CONST_METHOD0(target_level_dbfs, int());
MOCK_METHOD1(EnableStandaloneVad, void(bool enable));
MOCK_CONST_METHOD0(standalone_vad_enabled, bool());
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_

View File

@ -0,0 +1,77 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// GMM tables for inactive segments. Generated by MakeGmmTables.m.
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_NOISE_GMM_TABLES_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_NOISE_GMM_TABLES_H_
static const int kNoiseGmmNumMixtures = 12;
static const int kNoiseGmmDim = 3;
static const double kNoiseGmmCovarInverse[kNoiseGmmNumMixtures]
[kNoiseGmmDim][kNoiseGmmDim] = {
{{ 7.36219567592941e+00, 4.83060785179861e-03, 1.23335151497610e-02},
{ 4.83060785179861e-03, 1.65289507047817e-04, -2.41490588169997e-04},
{ 1.23335151497610e-02, -2.41490588169997e-04, 6.59472060689382e-03}},
{{ 8.70265239309140e+00, -5.30636201431086e-04, 5.44014966585347e-03},
{-5.30636201431086e-04, 3.11095453521008e-04, -1.86287206836035e-04},
{ 5.44014966585347e-03, -1.86287206836035e-04, 6.29493388790744e-04}},
{{ 4.53467851955055e+00, -3.92977536695197e-03, -2.46521420693317e-03},
{-3.92977536695197e-03, 4.94650752632750e-05, -1.08587438501826e-05},
{-2.46521420693317e-03, -1.08587438501826e-05, 9.28793975422261e-05}},
{{ 9.26817997114275e-01, -4.03976069276753e-04, -3.56441427392165e-03},
{-4.03976069276753e-04, 2.51976251631430e-06, 1.46914206734572e-07},
{-3.56441427392165e-03, 1.46914206734572e-07, 8.19914567685373e-05}},
{{ 7.61715986787441e+00, -1.54889041216888e-04, 2.41756280071656e-02},
{-1.54889041216888e-04, 3.50282550461672e-07, -6.27251196972490e-06},
{ 2.41756280071656e-02, -6.27251196972490e-06, 1.45061847649872e-02}},
{{ 8.31193642663158e+00, -3.84070508164323e-04, -3.09750630821876e-02},
{-3.84070508164323e-04, 3.80433432277336e-07, -1.14321142836636e-06},
{-3.09750630821876e-02, -1.14321142836636e-06, 8.35091486289997e-04}},
{{ 9.67283151270894e-01, 5.82465812445039e-05, -3.18350798617053e-03},
{ 5.82465812445039e-05, 2.23762672000318e-07, -7.74196587408623e-07},
{-3.18350798617053e-03, -7.74196587408623e-07, 3.85120938338325e-04}},
{{ 8.28066236985388e+00, 5.87634508319763e-05, 6.99303090891743e-03},
{ 5.87634508319763e-05, 2.93746018618058e-07, 3.40843332882272e-07},
{ 6.99303090891743e-03, 3.40843332882272e-07, 1.99379171190344e-04}},
{{ 6.07488998675646e+00, -1.11494526618473e-02, 5.10013111123381e-03},
{-1.11494526618473e-02, 6.99238879921751e-04, 5.36718550370870e-05},
{ 5.10013111123381e-03, 5.36718550370870e-05, 5.26909853276753e-04}},
{{ 6.90492021419175e+00, 4.20639355257863e-04, -2.38612752336481e-03},
{ 4.20639355257863e-04, 3.31246767338153e-06, -2.42052288150859e-08},
{-2.38612752336481e-03, -2.42052288150859e-08, 4.46608368363412e-04}},
{{ 1.31069150869715e+01, -1.73718583865670e-04, -1.97591814508578e-02},
{-1.73718583865670e-04, 2.80451716300124e-07, 9.96570755379865e-07},
{-1.97591814508578e-02, 9.96570755379865e-07, 2.41361900868847e-03}},
{{ 4.69566344239814e+00, -2.61077567563690e-04, 5.26359000761433e-03},
{-2.61077567563690e-04, 1.82420859823767e-06, -7.83645887541601e-07},
{ 5.26359000761433e-03, -7.83645887541601e-07, 1.33586288288802e-02}}};
static const double kNoiseGmmMean[kNoiseGmmNumMixtures][kNoiseGmmDim] = {
{-2.01386094766163e+00, 1.69702162045397e+02, 7.41715804872181e+01},
{-1.94684591777290e+00, 1.42398396732668e+02, 1.64186321157831e+02},
{-2.29319297562437e+00, 3.86415425589868e+02, 2.13452215267125e+02},
{-3.25487177070268e+00, 1.08668712553616e+03, 2.33119949467419e+02},
{-2.13159632447467e+00, 4.83821702557717e+03, 6.86786166673740e+01},
{-2.26171410780526e+00, 4.79420193982422e+03, 1.53222513286450e+02},
{-3.32166740703185e+00, 4.35161135834358e+03, 1.33206448431316e+02},
{-2.19290322814343e+00, 3.98325506609408e+03, 2.13249167359934e+02},
{-2.02898459255404e+00, 7.37039893155007e+03, 1.12518527491926e+02},
{-2.26150236399500e+00, 1.54896745196145e+03, 1.49717357868579e+02},
{-2.00417668301790e+00, 3.82434760310304e+03, 1.07438913004312e+02},
{-2.30193040814533e+00, 1.43953696546439e+03, 7.04085275122649e+01}};
static const double kNoiseGmmWeights[kNoiseGmmNumMixtures] = {
-1.09422832086193e+01, -1.10847897513425e+01, -1.36767587732187e+01,
-1.79789356118641e+01, -1.42830169160894e+01, -1.56500228061379e+01,
-1.83124990950113e+01, -1.69979436177477e+01, -1.12329424387828e+01,
-1.41311785780639e+01, -1.47171861448585e+01, -1.35963362781839e+01};
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_NOISE_GMM_TABLES_H_

View File

@ -0,0 +1,123 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/pitch_based_vad.h"
#include <assert.h>
#include <math.h>
#include <string.h>
#include "webrtc/modules/audio_processing/agc/circular_buffer.h"
#include "webrtc/modules/audio_processing/agc/common.h"
#include "webrtc/modules/audio_processing/agc/noise_gmm_tables.h"
#include "webrtc/modules/audio_processing/agc/voice_gmm_tables.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/system_wrappers/interface/compile_assert.h"
namespace webrtc {
COMPILE_ASSERT(kNoiseGmmDim == kVoiceGmmDim,
noise_and_voice_gmm_dimension_not_equal);
// These values should match MATLAB counterparts for unit-tests to pass.
static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames.
static const double kInitialPriorProbability = 0.3;
static const int kTransientWidthThreshold = 7;
static const double kLowProbabilityThreshold = 0.2;
static double LimitProbability(double p) {
const double kLimHigh = 0.99;
const double kLimLow = 0.01;
if (p > kLimHigh)
p = kLimHigh;
else if (p < kLimLow)
p = kLimLow;
return p;
}
PitchBasedVad::PitchBasedVad()
: p_prior_(kInitialPriorProbability),
circular_buffer_(AgcCircularBuffer::Create(kPosteriorHistorySize)) {
// Setup noise GMM.
noise_gmm_.dimension = kNoiseGmmDim;
noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
noise_gmm_.weight = kNoiseGmmWeights;
noise_gmm_.mean = &kNoiseGmmMean[0][0];
noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
// Setup voice GMM.
voice_gmm_.dimension = kVoiceGmmDim;
voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
voice_gmm_.weight = kVoiceGmmWeights;
voice_gmm_.mean = &kVoiceGmmMean[0][0];
voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
}
PitchBasedVad::~PitchBasedVad() {}
int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
double* p_combined) {
double p;
double gmm_features[3];
double pdf_features_given_voice;
double pdf_features_given_noise;
// These limits are the same in matlab implementation 'VoicingProbGMM().'
const double kLimLowLogPitchGain = -2.0;
const double kLimHighLogPitchGain = -0.9;
const double kLimLowSpectralPeak = 200;
const double kLimHighSpectralPeak = 2000;
const double kEps = 1e-12;
for (int n = 0; n < features.num_frames; n++) {
gmm_features[0] = features.log_pitch_gain[n];
gmm_features[1] = features.spectral_peak[n];
gmm_features[2] = features.pitch_lag_hz[n];
pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
if (features.spectral_peak[n] < kLimLowSpectralPeak ||
features.spectral_peak[n] > kLimHighSpectralPeak ||
features.log_pitch_gain[n] < kLimLowLogPitchGain) {
pdf_features_given_voice = kEps * pdf_features_given_noise;
} else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
pdf_features_given_noise = kEps * pdf_features_given_voice;
}
p = p_prior_ * pdf_features_given_voice / (pdf_features_given_voice *
p_prior_ + pdf_features_given_noise * (1 - p_prior_));
p = LimitProbability(p);
// Combine pitch-based probability with standalone probability, before
// updating prior probabilities.
double prod_active = p * p_combined[n];
double prod_inactive = (1 - p) * (1 - p_combined[n]);
p_combined[n] = prod_active / (prod_active + prod_inactive);
if (UpdatePrior(p_combined[n]) < 0)
return -1;
// Limit prior probability. With a zero prior probability the posterior
// probability is always zero.
p_prior_ = LimitProbability(p_prior_);
}
return 0;
}
int PitchBasedVad::UpdatePrior(double p) {
circular_buffer_->Insert(p);
if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
kLowProbabilityThreshold) < 0)
return -1;
p_prior_ = circular_buffer_->Mean();
return 0;
}
} // namespace webrtc

View File

@ -0,0 +1,56 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_BASED_VAD_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_BASED_VAD_H_
#include "webrtc/modules/audio_processing/agc/common.h"
#include "webrtc/modules/audio_processing/agc/gmm.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class AudioFrame;
class AgcCircularBuffer;
// Computes the probability of the input audio frame to be active given
// the corresponding pitch-gain and lag of the frame.
class PitchBasedVad {
public:
PitchBasedVad();
~PitchBasedVad();
// Compute pitch-based voicing probability, given the features.
// features: a structure containing features required for computing voicing
// probabilities.
//
// p_combined: an array which contains the combined activity probabilities
// computed prior to the call of this function. The method,
// then, computes the voicing probabilities and combine them
// with the given values. The result are returned in |p|.
int VoicingProbability(const AudioFeatures& features, double* p_combined);
private:
int UpdatePrior(double p);
// TODO(turajs): maybe defining this at a higher level (maybe enum) so that
// all the code recognize it as "no-error."
static const int kNoError = 0;
GmmParameters noise_gmm_;
GmmParameters voice_gmm_;
double p_prior_;
scoped_ptr<AgcCircularBuffer> circular_buffer_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_BASED_VAD_H_

View File

@ -0,0 +1,71 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/pitch_based_vad.h"
#include <math.h>
#include <stdio.h>
#include <string.h>
#include "gtest/gtest.h"
#include "webrtc/test/testsupport/fileutils.h"
namespace webrtc {
TEST(PitchBasedVadTest, VoicingProbabilityTest) {
std::string spectral_peak_file_name = test::ResourcePath(
"audio_processing/agc/agc_spectral_peak", "dat");
FILE* spectral_peak_file = fopen(spectral_peak_file_name.c_str(), "rb");
ASSERT_TRUE(spectral_peak_file != NULL);
std::string pitch_gain_file_name =
test::ResourcePath("audio_processing/agc/agc_pitch_gain", "dat");
FILE* pitch_gain_file = fopen(pitch_gain_file_name.c_str(), "rb");
ASSERT_TRUE(pitch_gain_file != NULL);
std::string pitch_lag_file_name =
test::ResourcePath("audio_processing/agc/agc_pitch_lag", "dat");
FILE* pitch_lag_file = fopen(pitch_lag_file_name.c_str(), "rb");
ASSERT_TRUE(pitch_lag_file != NULL);
std::string voicing_prob_file_name =
test::ResourcePath("audio_processing/agc/agc_voicing_prob", "dat");
FILE* voicing_prob_file = fopen(voicing_prob_file_name.c_str(), "rb");
ASSERT_TRUE(voicing_prob_file != NULL);
PitchBasedVad vad_;
double reference_activity_probability;
AudioFeatures audio_features;
memset(&audio_features, 0, sizeof(audio_features));
audio_features.num_frames = 1;
while (fread(audio_features.spectral_peak,
sizeof(audio_features.spectral_peak[0]), 1,
spectral_peak_file) == 1u) {
double p;
ASSERT_EQ(1u, fread(audio_features.log_pitch_gain, sizeof(
audio_features.log_pitch_gain[0]), 1, pitch_gain_file));
ASSERT_EQ(1u, fread(audio_features.pitch_lag_hz, sizeof(
audio_features.pitch_lag_hz[0]), 1, pitch_lag_file));
ASSERT_EQ(1u, fread(&reference_activity_probability, sizeof(
reference_activity_probability), 1, voicing_prob_file));
p = 0.5; // Initialize to the neutral value for combining probabilities.
EXPECT_EQ(0, vad_.VoicingProbability(audio_features, &p));
EXPECT_NEAR(p, reference_activity_probability, 0.01);
}
fclose(spectral_peak_file);
fclose(pitch_gain_file);
fclose(pitch_lag_file);
}
} // namespace webrtc

View File

@ -0,0 +1,52 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/pitch_internal.h"
#include <cmath>
// A 4-to-3 linear interpolation.
// The interpolation constants are derived as following:
// Input pitch parameters are updated every 7.5 ms. Within a 30-ms interval
// we are interested in pitch parameters of 0-5 ms, 10-15ms and 20-25ms. This is
// like interpolating 4-to-6 and keep the odd samples.
// The reason behind this is that LPC coefficients are computed for the first
// half of each 10ms interval.
static void PitchInterpolation(double old_val, const double* in, double* out) {
out[0] = 1. / 6. * old_val + 5. / 6. * in[0];
out[1] = 5. / 6. * in[1] + 1. / 6. * in[2];
out[2] = 0.5 * in[2] + 0.5 * in[3];
}
void GetSubframesPitchParameters(int sampling_rate_hz,
double* gains,
double* lags,
int num_in_frames,
int num_out_frames,
double* log_old_gain,
double* old_lag,
double* log_pitch_gain,
double* pitch_lag_hz) {
// Gain interpolation is in log-domain, also returned in log-domain.
for (int n = 0; n < num_in_frames; n++)
gains[n] = log(gains[n] + 1e-12);
// Interpolate lags and gains.
PitchInterpolation(*log_old_gain, gains, log_pitch_gain);
*log_old_gain = gains[num_in_frames - 1];
PitchInterpolation(*old_lag, lags, pitch_lag_hz);
*old_lag = lags[num_in_frames - 1];
// Convert pitch-lags to Hertz.
for (int n = 0; n < num_out_frames; n++) {
pitch_lag_hz[n] = (sampling_rate_hz) / (pitch_lag_hz[n]);
}
}

View File

@ -0,0 +1,26 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_INTERNAL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_INTERNAL_H_
// TODO(turajs): Write a description of this function. Also be consistent with
// usage of |sampling_rate_hz| vs |kSamplingFreqHz|.
void GetSubframesPitchParameters(int sampling_rate_hz,
double* gains,
double* lags,
int num_in_frames,
int num_out_frames,
double* log_old_gain,
double* old_lag,
double* log_pitch_gain,
double* pitch_lag_hz);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_INTERNAL_H_

View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/pitch_internal.h"
#include <math.h>
#include "gtest/gtest.h"
TEST(PitchInternalTest, test) {
const int kSamplingRateHz = 8000;
const int kNumInputParameters = 4;
const int kNumOutputParameters = 3;
// Inputs
double log_old_gain = log(0.5);
double gains[] = {0.6, 0.2, 0.5, 0.4};
double old_lag = 70;
double lags[] = {90, 111, 122, 50};
// Expected outputs
double expected_log_pitch_gain[] = {-0.541212549898316, -1.45672279045507,
-0.80471895621705};
double expected_log_old_gain = log(gains[kNumInputParameters - 1]);
double expected_pitch_lag_hz[] = {92.3076923076923, 70.9010339734121,
93.0232558139535};
double expected_old_lag = lags[kNumInputParameters - 1];
double log_pitch_gain[kNumOutputParameters];
double pitch_lag_hz[kNumInputParameters];
GetSubframesPitchParameters(kSamplingRateHz, gains, lags, kNumInputParameters,
kNumOutputParameters, &log_old_gain, &old_lag,
log_pitch_gain, pitch_lag_hz);
for (int n = 0; n < 3; n++) {
EXPECT_NEAR(pitch_lag_hz[n], expected_pitch_lag_hz[n], 1e-6);
EXPECT_NEAR(log_pitch_gain[n], expected_log_pitch_gain[n], 1e-8);
}
EXPECT_NEAR(old_lag, expected_old_lag, 1e-6);
EXPECT_NEAR(log_old_gain, expected_log_old_gain, 1e-8);
}

View File

@ -0,0 +1,111 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/pole_zero_filter.h"
#include <stdlib.h>
#include <string.h>
#include <algorithm>
namespace webrtc {
PoleZeroFilter* PoleZeroFilter::Create(const float* numerator_coefficients,
int order_numerator,
const float* denominator_coefficients,
int order_denominator) {
if (order_numerator < 0 ||
order_denominator < 0 ||
order_numerator > kMaxFilterOrder ||
order_denominator > kMaxFilterOrder ||
denominator_coefficients[0] == 0 ||
numerator_coefficients == NULL ||
denominator_coefficients == NULL)
return NULL;
return new PoleZeroFilter(numerator_coefficients, order_numerator,
denominator_coefficients, order_denominator);
}
PoleZeroFilter::PoleZeroFilter(const float* numerator_coefficients,
int order_numerator,
const float* denominator_coefficients,
int order_denominator)
: past_input_(),
past_output_(),
numerator_coefficients_(),
denominator_coefficients_(),
order_numerator_(order_numerator),
order_denominator_(order_denominator),
highest_order_(std::max(order_denominator, order_numerator)) {
memcpy(numerator_coefficients_, numerator_coefficients,
sizeof(numerator_coefficients_[0]) * (order_numerator_ + 1));
memcpy(denominator_coefficients_, denominator_coefficients,
sizeof(denominator_coefficients_[0]) * (order_denominator_ + 1));
if (denominator_coefficients_[0] != 1) {
for (int n = 0; n <= order_numerator_; n++)
numerator_coefficients_[n] /= denominator_coefficients_[0];
for (int n = 0; n <= order_denominator_; n++)
denominator_coefficients_[n] /= denominator_coefficients_[0];
}
}
template <typename T>
static float FilterArPast(const T* past, int order,
const float* coefficients) {
float sum = 0.0f;
int past_index = order - 1;
for (int k = 1; k <= order; k++, past_index--)
sum += coefficients[k] * past[past_index];
return sum;
}
int PoleZeroFilter::Filter(const int16_t* in,
int num_input_samples,
float* output) {
if (in == NULL || num_input_samples < 0 || output == NULL)
return -1;
// This is the typical case, just a memcpy.
const int k = std::min(num_input_samples, highest_order_);
int n;
for (n = 0; n < k; n++) {
output[n] = in[n] * numerator_coefficients_[0];
output[n] += FilterArPast(&past_input_[n], order_numerator_,
numerator_coefficients_);
output[n] -= FilterArPast(&past_output_[n], order_denominator_,
denominator_coefficients_);
past_input_[n + order_numerator_] = in[n];
past_output_[n + order_denominator_] = output[n];
}
if (highest_order_ < num_input_samples) {
for (int m = 0; n < num_input_samples; n++, m++) {
output[n] = in[n] * numerator_coefficients_[0];
output[n] += FilterArPast(&in[m], order_numerator_,
numerator_coefficients_);
output[n] -= FilterArPast(&output[m], order_denominator_,
denominator_coefficients_);
}
// Record into the past signal.
memcpy(past_input_, &in[num_input_samples - order_numerator_],
sizeof(in[0]) * order_numerator_);
memcpy(past_output_, &output[num_input_samples - order_denominator_],
sizeof(output[0]) * order_denominator_);
} else {
// Odd case that the length of the input is shorter that filter order.
memmove(past_input_, &past_input_[num_input_samples], order_numerator_ *
sizeof(past_input_[0]));
memmove(past_output_, &past_output_[num_input_samples], order_denominator_ *
sizeof(past_output_[0]));
}
return 0;
}
} // namespace webrtc

View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_POLE_ZERO_FILTER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_POLE_ZERO_FILTER_H_
#include "webrtc/typedefs.h"
namespace webrtc {
class PoleZeroFilter {
public:
~PoleZeroFilter() {}
static PoleZeroFilter* Create(const float* numerator_coefficients,
int order_numerator,
const float* denominator_coefficients,
int order_denominator);
int Filter(const int16_t* in, int num_input_samples, float* output);
private:
PoleZeroFilter(const float* numerator_coefficients,
int order_numerator,
const float* denominator_coefficients,
int order_denominator);
static const int kMaxFilterOrder = 24;
int16_t past_input_[kMaxFilterOrder * 2];
float past_output_[kMaxFilterOrder * 2];
float numerator_coefficients_[kMaxFilterOrder + 1];
float denominator_coefficients_[kMaxFilterOrder + 1];
int order_numerator_;
int order_denominator_;
int highest_order_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_POLE_ZERO_FILTER_H_

View File

@ -0,0 +1,98 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/pole_zero_filter.h"
#include <math.h>
#include <stdio.h>
#include "gtest/gtest.h"
#include "webrtc/modules/audio_processing/agc/agc_audio_proc_internal.h"
#include "webrtc/system_wrappers/interface/compile_assert.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/test/testsupport/fileutils.h"
namespace webrtc {
static const int kInputSamples = 50;
static const int16_t kInput[kInputSamples] = {-2136, -7116, 10715, 2464, 3164,
8139, 11393, 24013, -32117, -5544, -27740, 10181, 14190, -24055, -15912,
17393, 6359, -9950, -13894, 32432, -23944, 3437, -8381, 19768, 3087, -19795,
-5920, 13310, 1407, 3876, 4059, 3524, -23130, 19121, -27900, -24840, 4089,
21422, -3625, 3015, -11236, 28856, 13424, 6571, -19761, -6361, 15821, -9469,
29727, 32229};
static const float kReferenceOutput[kInputSamples] = {-2082.230472f,
-6878.572941f, 10697.090871f, 2358.373952f, 2973.936512f, 7738.580650f,
10690.803213f, 22687.091576f, -32676.684717f, -5879.621684f, -27359.297432f,
10368.735888f, 13994.584604f, -23676.126249f, -15078.250390f, 17818.253338f,
6577.743123f, -9498.369315f, -13073.651079f, 32460.026588f, -23391.849347f,
3953.805667f, -7667.761363f, 19995.153447f, 3185.575477f, -19207.365160f,
-5143.103201f, 13756.317237f, 1779.654794f, 4142.269755f, 4209.475034f,
3572.991789f, -22509.089546f, 19307.878964f, -27060.439759f, -23319.042810f,
5547.685267f, 22312.718676f, -2707.309027f, 3852.358490f, -10135.510093f,
29241.509970f, 13394.397233f, 6340.721417f, -19510.207905f, -5908.442086f,
15882.301634f, -9211.335255f, 29253.056735f, 30874.443046f};
class PoleZeroFilterTest : public ::testing::Test {
protected:
PoleZeroFilterTest()
: my_filter_(PoleZeroFilter::Create(
kCoeffNumerator, kFilterOrder, kCoeffDenominator, kFilterOrder)) {}
~PoleZeroFilterTest() {}
void FilterSubframes(int num_subframes);
private:
void TestClean();
scoped_ptr<PoleZeroFilter> my_filter_;
};
void PoleZeroFilterTest::FilterSubframes(int num_subframes) {
float output[kInputSamples];
const int num_subframe_samples = kInputSamples / num_subframes;
EXPECT_EQ(num_subframe_samples * num_subframes, kInputSamples);
for (int n = 0; n < num_subframes; n++) {
my_filter_->Filter(&kInput[n * num_subframe_samples], num_subframe_samples,
&output[n * num_subframe_samples]);
}
for (int n = 0; n < kInputSamples; n++) {
EXPECT_NEAR(output[n], kReferenceOutput[n], 1);
}
}
TEST_F(PoleZeroFilterTest, OneSubframe) {
FilterSubframes(1);
}
TEST_F(PoleZeroFilterTest, TwoSubframes) {
FilterSubframes(2);
}
TEST_F(PoleZeroFilterTest, FiveSubframes) {
FilterSubframes(5);
}
TEST_F(PoleZeroFilterTest, TenSubframes) {
FilterSubframes(10);
}
TEST_F(PoleZeroFilterTest, TwentyFiveSubframes) {
FilterSubframes(25);
}
TEST_F(PoleZeroFilterTest, FiftySubframes) {
FilterSubframes(50);
}
} // namespace webrtc

View File

@ -0,0 +1,96 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/standalone_vad.h"
#include <assert.h>
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/modules/utility/interface/audio_frame_operations.h"
#include "webrtc/typedefs.h"
namespace webrtc {
static const int kDefaultStandaloneVadMode = 3;
StandaloneVad::StandaloneVad(VadInst* vad)
: vad_(vad),
buffer_(),
index_(0),
mode_(kDefaultStandaloneVadMode) {}
StandaloneVad::~StandaloneVad() {
WebRtcVad_Free(vad_);
}
StandaloneVad* StandaloneVad::Create() {
VadInst* vad = NULL;
if (WebRtcVad_Create(&vad) < 0)
return NULL;
int err = WebRtcVad_Init(vad);
err |= WebRtcVad_set_mode(vad, kDefaultStandaloneVadMode);
if (err != 0) {
WebRtcVad_Free(vad);
return NULL;
}
return new StandaloneVad(vad);
}
int StandaloneVad::AddAudio(const int16_t* data, int length) {
if (length != kLength10Ms)
return -1;
if (index_ + length > kLength10Ms * kMaxNum10msFrames)
// Reset the buffer if it's full.
// TODO(ajm): Instead, consider just processing every 10 ms frame. Then we
// can forgo the buffering.
index_ = 0;
memcpy(&buffer_[index_], data, sizeof(int16_t) * length);
index_ += length;
return 0;
}
int StandaloneVad::GetActivity(double* p, int length_p) {
if (index_ == 0)
return -1;
const int num_frames = index_ / kLength10Ms;
if (num_frames > length_p)
return -1;
assert(WebRtcVad_ValidRateAndFrameLength(kSampleRateHz, index_) == 0);
int activity = WebRtcVad_Process(vad_, kSampleRateHz, buffer_, index_);
if (activity < 0)
return -1;
else if (activity == 0)
p[0] = 0.01; // Arbitrary but small and non-zero.
else
p[0] = 0.5; // 0.5 is neutral values when combinned by other probabilities.
for (int n = 1; n < num_frames; n++)
p[n] = p[0];
// Reset the buffer to start from the beginning.
index_ = 0;
return activity;
}
int StandaloneVad::set_mode(int mode) {
if (mode < 0 || mode > 3)
return -1;
if (WebRtcVad_set_mode(vad_, mode) != 0)
return -1;
mode_ = mode;
return 0;
}
} // namespace webrtc

View File

@ -0,0 +1,70 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
#include "webrtc/common_audio/vad/include/webrtc_vad.h"
#include "webrtc/modules/audio_processing/agc/common.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class AudioFrame;
class StandaloneVad {
public:
static StandaloneVad* Create();
~StandaloneVad();
// Outputs
// p: a buffer where probabilities are written to.
// length_p: number of elements of |p|.
//
// return value:
// -1: if no audio is stored or VAD returns error.
// 0: in success.
// In case of error the content of |activity| is unchanged.
//
// Note that due to a high false-positive (VAD decision is active while the
// processed audio is just background noise) rate, stand-alone VAD is used as
// a one-sided indicator. The activity probability is 0.5 if the frame is
// classified as active, and the probability is 0.01 if the audio is
// classified as passive. In this way, when probabilities are combined, the
// effect of the stand-alone VAD is neutral if the input is classified as
// active.
int GetActivity(double* p, int length_p);
// Expecting 10 ms of 16 kHz audio to be pushed in.
int AddAudio(const int16_t* data, int length);
// Set aggressiveness of VAD, 0 is the least aggressive and 3 is the most
// aggressive mode. Returns -1 if the input is less than 0 or larger than 3,
// otherwise 0 is returned.
int set_mode(int mode);
// Get the agressiveness of the current VAD.
int mode() const { return mode_; }
private:
explicit StandaloneVad(VadInst* vad);
static const int kMaxNum10msFrames = 3;
// TODO(turajs): Is there a way to use scoped-pointer here?
VadInst* vad_;
int16_t buffer_[kMaxNum10msFrames * kLength10Ms];
int index_;
int mode_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_

View File

@ -0,0 +1,103 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/standalone_vad.h"
#include <string.h>
#include "gtest/gtest.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/test/testsupport/fileutils.h"
namespace webrtc {
TEST(StandaloneVadTest, Api) {
scoped_ptr<StandaloneVad> vad(StandaloneVad::Create());
int16_t data[kLength10Ms] = { 0 };
// Valid frame length (for 32 kHz rate), but not what the VAD is expecting.
EXPECT_EQ(-1, vad->AddAudio(data, 320));
const int kMaxNumFrames = 3;
double p[kMaxNumFrames];
for (int n = 0; n < kMaxNumFrames; n++)
EXPECT_EQ(0, vad->AddAudio(data, kLength10Ms));
// Pretend |p| is shorter that it should be.
EXPECT_EQ(-1, vad->GetActivity(p, kMaxNumFrames - 1));
EXPECT_EQ(0, vad->GetActivity(p, kMaxNumFrames));
// Ask for activity when buffer is empty.
EXPECT_EQ(-1, vad->GetActivity(p, kMaxNumFrames));
// Should reset and result in one buffer.
for (int n = 0; n < kMaxNumFrames + 1; n++)
EXPECT_EQ(0, vad->AddAudio(data, kLength10Ms));
EXPECT_EQ(0, vad->GetActivity(p, 1));
// Wrong modes
EXPECT_EQ(-1, vad->set_mode(-1));
EXPECT_EQ(-1, vad->set_mode(4));
// Valid mode.
const int kMode = 2;
EXPECT_EQ(0, vad->set_mode(kMode));
EXPECT_EQ(kMode, vad->mode());
}
TEST(StandaloneVadTest, ActivityDetection) {
scoped_ptr<StandaloneVad> vad(StandaloneVad::Create());
const size_t kDataLength = kLength10Ms;
int16_t data[kDataLength] = { 0 };
FILE* pcm_file =
fopen(test::ResourcePath("audio_processing/agc/agc_audio", "pcm").c_str(),
"rb");
ASSERT_TRUE(pcm_file != NULL);
FILE* reference_file = fopen(
test::ResourcePath("audio_processing/agc/agc_vad", "dat").c_str(), "rb");
ASSERT_TRUE(reference_file != NULL);
// Reference activities are prepared with 0 aggressiveness.
ASSERT_EQ(0, vad->set_mode(0));
// Stand-alone VAD can operate on 1, 2 or 3 frames of length 10 ms. The
// reference file is created for 30 ms frame.
const int kNumVadFramesToProcess = 3;
int num_frames = 0;
while (fread(data, sizeof(int16_t), kDataLength, pcm_file) == kDataLength) {
vad->AddAudio(data, kDataLength);
num_frames++;
if (num_frames == kNumVadFramesToProcess) {
num_frames = 0;
int referece_activity;
double p[kNumVadFramesToProcess];
EXPECT_EQ(1u, fread(&referece_activity, sizeof(referece_activity), 1,
reference_file));
int activity = vad->GetActivity(p, kNumVadFramesToProcess);
EXPECT_EQ(referece_activity, activity);
if (activity != 0) {
// When active, probabilities are set to 0.5.
for (int n = 0; n < kNumVadFramesToProcess; n++)
EXPECT_EQ(0.5, p[n]);
} else {
// When inactive, probabilities are set to 0.01.
for (int n = 0; n < kNumVadFramesToProcess; n++)
EXPECT_EQ(0.01, p[n]);
}
}
}
fclose(reference_file);
fclose(pcm_file);
}
}

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_TEST_FAKE_AGC_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_TEST_FAKE_AGC_H_
#include "webrtc/modules/audio_processing/agc/agc.h"
namespace webrtc {
class FakeAgc : public Agc {
public:
FakeAgc()
: counter_(0),
volume_(kMaxVolume / 2) {
}
virtual int Process(const AudioFrame& audio_frame) {
const int kUpdateIntervalFrames = 10;
const int kMaxVolume = 255;
if (counter_ % kUpdateIntervalFrames == 0) {
volume_ = (++volume_) % kMaxVolume;
}
counter_++;
return 0;
}
virtual int FakeAgc::MicVolume() {
return volume_;
}
private:
int counter_;
int volume_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_TEST_FAKE_AGC_H_

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/test/test_utils.h"
#include <cmath>
#include <algorithm>
#include "webrtc/modules/interface/module_common_types.h"
namespace webrtc {
float MicLevel2Gain(int gain_range_db, int level) {
return (level - 127.0f) / 128.0f * gain_range_db / 2;
}
float Db2Linear(float db) {
return powf(10.0f, db / 20.0f);
}
void ApplyGainLinear(float gain, float last_gain, AudioFrame* frame) {
const int frame_length = frame->samples_per_channel_ * frame->num_channels_;
// Smooth the transition between gain levels across the frame.
float smoothed_gain = last_gain;
float gain_step = (gain - last_gain) / (frame_length - 1);
for (int i = 0; i < frame_length; ++i) {
smoothed_gain += gain_step;
float sample = std::floor(frame->data_[i] * smoothed_gain + 0.5);
sample = std::max(std::min(32767.0f, sample), -32768.0f);
frame->data_[i] = static_cast<int16_t>(sample);
}
}
void ApplyGain(float gain_db, float last_gain_db, AudioFrame* frame) {
ApplyGainLinear(Db2Linear(gain_db), Db2Linear(last_gain_db), frame);
}
void SimulateMic(int gain_range_db, int mic_level, int last_mic_level,
AudioFrame* frame) {
assert(mic_level >= 0 && mic_level <= 255);
assert(last_mic_level >= 0 && last_mic_level <= 255);
ApplyGain(MicLevel2Gain(gain_range_db, mic_level),
MicLevel2Gain(gain_range_db, last_mic_level),
frame);
}
void SimulateMic(int gain_map[255], int mic_level, int last_mic_level,
AudioFrame* frame) {
assert(mic_level >= 0 && mic_level <= 255);
assert(last_mic_level >= 0 && last_mic_level <= 255);
ApplyGain(gain_map[mic_level], gain_map[last_mic_level], frame);
}
} // namespace webrtc

View File

@ -0,0 +1,28 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_
namespace webrtc {
class AudioFrame;
float MicLevel2Gain(int gain_range_db, int level);
float Db2Linear(float db);
void ApplyGainLinear(float gain, float last_gain, AudioFrame* frame);
void ApplyGain(float gain_db, float last_gain_db, AudioFrame* frame);
void SimulateMic(int gain_range_db, int mic_level, int last_mic_level,
AudioFrame* frame);
void SimulateMic(int gain_map[255], int mic_level, int last_mic_level,
AudioFrame* frame);
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_

View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/utility.h"
#include <math.h>
static const double kLog10 = 2.30258509299;
static const double kLinear2DbScale = 20.0 / kLog10;
static const double kLinear2LoudnessScale = 13.4 / kLog10;
double Loudness2Db(double loudness) {
return loudness * kLinear2DbScale / kLinear2LoudnessScale;
}
double Linear2Loudness(double rms) {
if (rms == 0)
return -15;
return kLinear2LoudnessScale * log(rms);
}
double Db2Loudness(double db) {
return db * kLinear2LoudnessScale / kLinear2DbScale;
}
double Dbfs2Loudness(double dbfs) {
return Db2Loudness(90 + dbfs);
}

View File

@ -0,0 +1,23 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
// TODO(turajs): Add description of function.
double Loudness2Db(double loudness);
double Linear2Loudness(double rms);
double Db2Loudness(double db);
double Dbfs2Loudness(double dbfs);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_

View File

@ -0,0 +1,77 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// GMM tables for active segments. Generated by MakeGmmTables.m.
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_VOICE_GMM_TABLES_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_VOICE_GMM_TABLES_H_
static const int kVoiceGmmNumMixtures = 12;
static const int kVoiceGmmDim = 3;
static const double kVoiceGmmCovarInverse[kVoiceGmmNumMixtures]
[kVoiceGmmDim][kVoiceGmmDim] = {
{{ 1.83673825579513e+00, -8.09791637570095e-04, 4.60106414365986e-03},
{-8.09791637570095e-04, 8.89351738394608e-04, -9.80188953277734e-04},
{ 4.60106414365986e-03, -9.80188953277734e-04, 1.38706060206582e-03}},
{{ 6.76228912850703e+01, -1.98893120119660e-02, -3.53548357253551e-03},
{-1.98893120119660e-02, 3.96216858500530e-05, -4.08492938394097e-05},
{-3.53548357253551e-03, -4.08492938394097e-05, 9.31864352856416e-04}},
{{ 9.98612435944558e+00, -5.27880954316893e-03, -6.30342541619017e-03},
{-5.27880954316893e-03, 4.54359480225226e-05, 6.30804591626044e-05},
{-6.30342541619017e-03, 6.30804591626044e-05, 5.36466441382942e-04}},
{{ 3.39917474216349e+01, -1.56213579433191e-03, -4.01459014990225e-02},
{-1.56213579433191e-03, 6.40415424897724e-05, 6.20076342427833e-05},
{-4.01459014990225e-02, 6.20076342427833e-05, 3.51199070103063e-03}},
{{ 1.34545062271428e+01, -7.94513610147144e-03, -5.34401019341728e-02},
{-7.94513610147144e-03, 1.16511820098649e-04, 4.66063702069293e-05},
{-5.34401019341728e-02, 4.66063702069293e-05, 2.72354323774163e-03}},
{{ 1.08557844314806e+02, -1.54885805673668e-02, -1.88029692674851e-02},
{-1.54885805673668e-02, 1.16404042786406e-04, 6.45579292702802e-06},
{-1.88029692674851e-02, 6.45579292702802e-06, 4.32330478391416e-04}},
{{ 8.22940066541450e+01, -1.15903110231303e-02, -4.92166764865343e-02},
{-1.15903110231303e-02, 7.42510742165261e-05, 3.73007314191290e-06},
{-4.92166764865343e-02, 3.73007314191290e-06, 3.64005221593244e-03}},
{{ 2.31133605685660e+00, -7.83261568950254e-04, 7.45744012346313e-04},
{-7.83261568950254e-04, 1.29460648214142e-05, -2.22774455093730e-06},
{ 7.45744012346313e-04, -2.22774455093730e-06, 1.05117294093010e-04}},
{{ 3.78767849189611e+02, 1.57759761011568e-03, -2.08551217988774e-02},
{ 1.57759761011568e-03, 4.76066236886865e-05, -2.33977412299324e-05},
{-2.08551217988774e-02, -2.33977412299324e-05, 5.24261005371196e-04}},
{{ 6.98580096506135e-01, -5.13850255217378e-04, -4.01124551717056e-04},
{-5.13850255217378e-04, 1.40501021984840e-06, -2.09496928716569e-06},
{-4.01124551717056e-04, -2.09496928716569e-06, 2.82879357740037e-04}},
{{ 2.62770945162399e+00, -2.31825753241430e-03, -5.30447217466318e-03},
{-2.31825753241430e-03, 4.59108572227649e-05, 7.67631886355405e-05},
{-5.30447217466318e-03, 7.67631886355405e-05, 2.28521601674098e-03}},
{{ 1.89940391362152e+02, -4.23280856852379e-03, -2.70608873541399e-02},
{-4.23280856852379e-03, 6.77547582742563e-05, 2.69154203800467e-05},
{-2.70608873541399e-02, 2.69154203800467e-05, 3.88574543373470e-03}}};
static const double kVoiceGmmMean[kVoiceGmmNumMixtures][kVoiceGmmDim] = {
{-2.15020241646536e+00, 4.97079062999877e+02, 4.77078119504505e+02},
{-8.92097680029190e-01, 5.92064964199921e+02, 1.81045145941059e+02},
{-1.29435784144398e+00, 4.98450293410611e+02, 1.71991263804064e+02},
{-1.03925228397884e+00, 4.99511274321571e+02, 1.05838336539105e+02},
{-1.29229047206129e+00, 4.15026762566707e+02, 1.12861119017125e+02},
{-7.88748114599810e-01, 4.48739336688113e+02, 1.89784216956337e+02},
{-8.77777402332642e-01, 4.86620285054533e+02, 1.13477708016491e+02},
{-2.06465957063057e+00, 6.33385049870607e+02, 2.32758546796149e+02},
{-6.98893789231685e-01, 5.93622051503385e+02, 1.92536982473203e+02},
{-2.55901217508894e+00, 1.55914919756205e+03, 1.39769980835570e+02},
{-1.92070024165837e+00, 4.87983940444185e+02, 1.02745468128289e+02},
{-7.29187507662854e-01, 5.22717685022855e+02, 1.16377942283991e+02}};
static const double kVoiceGmmWeights[kVoiceGmmNumMixtures] = {
-1.39789694361035e+01, -1.19527720202104e+01, -1.32396317929055e+01,
-1.09436815209238e+01, -1.13440027478149e+01, -1.12200721834504e+01,
-1.02537324043693e+01, -1.60789861938302e+01, -1.03394494048344e+01,
-1.83207938586818e+01, -1.31186044948288e+01, -9.52479998673554e+00};
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_VOICE_GMM_TABLES_H_

View File

@ -9,6 +9,7 @@
{
'variables': {
'audio_processing_dependencies': [
'<(DEPTH)/webrtc/modules/modules.gyp:iSAC',
'<(webrtc_root)/base/base.gyp:rtc_base_approved',
'<(webrtc_root)/common_audio/common_audio.gyp:common_audio',
'<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers',
@ -33,25 +34,52 @@
'<@(audio_processing_dependencies)',
],
'sources': [
'aec/include/echo_cancellation.h',
'aec/aec_core.c',
'aec/aec_core.h',
'aec/aec_core_internal.h',
'aec/aec_rdft.c',
'aec/aec_rdft.h',
'aec/aec_resampler.c',
'aec/aec_resampler.h',
'aec/echo_cancellation.c',
'aec/echo_cancellation_internal.h',
'aec/aec_core.h',
'aec/aec_core.c',
'aec/aec_core_internal.h',
'aec/aec_rdft.h',
'aec/aec_rdft.c',
'aec/aec_resampler.h',
'aec/aec_resampler.c',
'aecm/include/echo_control_mobile.h',
'aecm/echo_control_mobile.c',
'aec/include/echo_cancellation.h',
'aecm/aecm_core.c',
'aecm/aecm_core.h',
'agc/include/gain_control.h',
'aecm/echo_control_mobile.c',
'aecm/include/echo_control_mobile.h',
'agc/agc.cc',
'agc/agc.h',
'agc/agc_audio_proc.cc',
'agc/agc_audio_proc.h',
'agc/agc_audio_proc_internal.h',
'agc/agc_manager_direct.cc',
'agc/agc_manager_direct.h',
'agc/analog_agc.c',
'agc/analog_agc.h',
'agc/circular_buffer.cc',
'agc/circular_buffer.h',
'agc/common.h',
'agc/digital_agc.c',
'agc/digital_agc.h',
'agc/gain_map_internal.h',
'agc/gmm.cc',
'agc/gmm.h',
'agc/histogram.cc',
'agc/histogram.h',
'agc/include/gain_control.h',
'agc/noise_gmm_tables.h',
'agc/pitch_based_vad.cc',
'agc/pitch_based_vad.h',
'agc/pitch_internal.cc',
'agc/pitch_internal.h',
'agc/pole_zero_filter.cc',
'agc/pole_zero_filter.h',
'agc/standalone_vad.cc',
'agc/standalone_vad.h',
'agc/utility.cc',
'agc/utility.h',
'agc/voice_gmm_tables.h',
'audio_buffer.cc',
'audio_buffer.h',
'audio_processing_impl.cc',
@ -74,10 +102,23 @@
'noise_suppression_impl.h',
'processing_component.cc',
'processing_component.h',
'splitting_filter.cc',
'splitting_filter.h',
'rms_level.cc',
'rms_level.h',
'splitting_filter.cc',
'splitting_filter.h',
'transient/common.h',
'transient/daubechies_8_wavelet_coeffs.h',
'transient/dyadic_decimator.h',
'transient/moving_moments.cc',
'transient/moving_moments.h',
'transient/transient_detector.cc',
'transient/transient_detector.h',
'transient/transient_suppressor.cc',
'transient/transient_suppressor.h',
'transient/wpd_node.cc',
'transient/wpd_node.h',
'transient/wpd_tree.cc',
'transient/wpd_tree.h',
'typing_detection.cc',
'typing_detection.h',
'utility/delay_estimator.c',

View File

@ -15,6 +15,8 @@
#include "webrtc/base/platform_file.h"
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/modules/audio_processing/channel_buffer.h"
#include "webrtc/modules/audio_processing/common.h"
@ -54,6 +56,85 @@ namespace webrtc {
// Throughout webrtc, it's assumed that success is represented by zero.
COMPILE_ASSERT(AudioProcessing::kNoError == 0, no_error_must_be_zero);
// This class has two main functionalities:
//
// 1) It is returned instead of the real GainControl after the new AGC has been
// enabled in order to prevent an outside user from overriding compression
// settings. It doesn't do anything in its implementation, except for
// delegating the const methods and Enable calls to the real GainControl, so
// AGC can still be disabled.
//
// 2) It is injected into AgcManagerDirect and implements volume callbacks for
// getting and setting the volume level. It just caches this value to be used
// in VoiceEngine later.
class GainControlForNewAgc : public GainControl, public VolumeCallbacks {
public:
explicit GainControlForNewAgc(GainControlImpl* gain_control)
: real_gain_control_(gain_control),
volume_(0) {
}
// GainControl implementation.
virtual int Enable(bool enable) OVERRIDE {
return real_gain_control_->Enable(enable);
}
virtual bool is_enabled() const OVERRIDE {
return real_gain_control_->is_enabled();
}
virtual int set_stream_analog_level(int level) OVERRIDE {
volume_ = level;
return AudioProcessing::kNoError;
}
virtual int stream_analog_level() OVERRIDE {
return volume_;
}
virtual int set_mode(Mode mode) OVERRIDE { return AudioProcessing::kNoError; }
virtual Mode mode() const OVERRIDE { return GainControl::kAdaptiveAnalog; }
virtual int set_target_level_dbfs(int level) OVERRIDE {
return AudioProcessing::kNoError;
}
virtual int target_level_dbfs() const OVERRIDE {
return real_gain_control_->target_level_dbfs();
}
virtual int set_compression_gain_db(int gain) OVERRIDE {
return AudioProcessing::kNoError;
}
virtual int compression_gain_db() const OVERRIDE {
return real_gain_control_->compression_gain_db();
}
virtual int enable_limiter(bool enable) OVERRIDE {
return AudioProcessing::kNoError;
}
virtual bool is_limiter_enabled() const OVERRIDE {
return real_gain_control_->is_limiter_enabled();
}
virtual int set_analog_level_limits(int minimum,
int maximum) OVERRIDE {
return AudioProcessing::kNoError;
}
virtual int analog_level_minimum() const OVERRIDE {
return real_gain_control_->analog_level_minimum();
}
virtual int analog_level_maximum() const OVERRIDE {
return real_gain_control_->analog_level_maximum();
}
virtual bool stream_is_saturated() const OVERRIDE {
return real_gain_control_->stream_is_saturated();
}
// VolumeCallbacks implementation.
virtual void SetMicVolume(int volume) OVERRIDE {
volume_ = volume;
}
virtual int GetMicVolume() OVERRIDE {
return volume_;
}
private:
GainControl* real_gain_control_;
int volume_;
};
AudioProcessing* AudioProcessing::Create(int id) {
return Create();
}
@ -96,7 +177,13 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config)
delay_offset_ms_(0),
was_stream_delay_set_(false),
output_will_be_muted_(false),
key_pressed_(false) {
key_pressed_(false),
#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
use_new_agc_(false),
#else
use_new_agc_(config.Get<ExperimentalAgc>().enabled),
#endif
transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled) {
echo_cancellation_ = new EchoCancellationImpl(this, crit_);
component_list_.push_back(echo_cancellation_);
@ -118,12 +205,18 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config)
voice_detection_ = new VoiceDetectionImpl(this, crit_);
component_list_.push_back(voice_detection_);
gain_control_for_new_agc_.reset(new GainControlForNewAgc(gain_control_));
SetExtraOptions(config);
}
AudioProcessingImpl::~AudioProcessingImpl() {
{
CriticalSectionScoped crit_scoped(crit_);
// Depends on gain_control_ and gain_control_for_new_agc_.
agc_manager_.reset();
// Depends on gain_control_.
gain_control_for_new_agc_.reset();
while (!component_list_.empty()) {
ProcessingComponent* component = component_list_.front();
component->Destroy();
@ -192,6 +285,16 @@ int AudioProcessingImpl::InitializeLocked() {
}
}
int err = InitializeExperimentalAgc();
if (err != kNoError) {
return err;
}
err = InitializeTransient();
if (err != kNoError) {
return err;
}
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
if (debug_file_->Open()) {
int err = WriteInitMessage();
@ -303,6 +406,11 @@ void AudioProcessingImpl::SetExtraOptions(const Config& config) {
std::list<ProcessingComponent*>::iterator it;
for (it = component_list_.begin(); it != component_list_.end(); ++it)
(*it)->SetExtraOptions(config);
if (transient_suppressor_enabled_ != config.Get<ExperimentalNs>().enabled) {
transient_suppressor_enabled_ = config.Get<ExperimentalNs>().enabled;
InitializeTransient();
}
}
int AudioProcessingImpl::input_sample_rate_hz() const {
@ -337,6 +445,10 @@ int AudioProcessingImpl::num_output_channels() const {
void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
output_will_be_muted_ = muted;
CriticalSectionScoped lock(crit_);
if (agc_manager_.get()) {
agc_manager_->SetCaptureMuted(output_will_be_muted_);
}
}
bool AudioProcessingImpl::output_will_be_muted() const {
@ -470,6 +582,12 @@ int AudioProcessingImpl::ProcessStreamLocked() {
#endif
AudioBuffer* ca = capture_audio_.get(); // For brevity.
if (use_new_agc_ && gain_control_->is_enabled()) {
agc_manager_->AnalyzePreProcess(ca->data(0),
ca->num_channels(),
fwd_proc_format_.samples_per_channel());
}
bool data_processed = is_data_processed();
if (analysis_needed(data_processed)) {
ca->SplitIntoFrequencyBands();
@ -486,12 +604,35 @@ int AudioProcessingImpl::ProcessStreamLocked() {
RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(ca));
RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca));
RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca));
if (use_new_agc_ && gain_control_->is_enabled()) {
agc_manager_->Process(ca->split_bands_const(0)[kBand0To8kHz],
ca->samples_per_split_channel(),
split_rate_);
}
RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(ca));
if (synthesis_needed(data_processed)) {
ca->MergeFrequencyBands();
}
// TODO(aluebs): Investigate if the transient suppression placement should be
// before or after the AGC.
if (transient_suppressor_enabled_) {
float voice_probability =
agc_manager_.get() ? agc_manager_->voice_probability() : 1.f;
transient_suppressor_->Suppress(ca->data_f(0),
ca->samples_per_channel(),
ca->num_channels(),
ca->split_bands_const_f(0)[kBand0To8kHz],
ca->samples_per_split_channel(),
ca->keyboard_data(),
ca->samples_per_keyboard_channel(),
voice_probability,
key_pressed_);
}
// The level estimator operates on the recombined data.
RETURN_ON_ERR(level_estimator_->ProcessStream(ca));
@ -586,7 +727,9 @@ int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra));
RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra));
RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
if (!use_new_agc_) {
RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
}
return kNoError;
}
@ -728,6 +871,9 @@ EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {
}
GainControl* AudioProcessingImpl::gain_control() const {
if (use_new_agc_) {
return gain_control_for_new_agc_.get();
}
return gain_control_;
}
@ -775,7 +921,7 @@ bool AudioProcessingImpl::is_data_processed() const {
bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
// Check if we've upmixed or downmixed the audio.
return ((fwd_out_format_.num_channels() != fwd_in_format_.num_channels()) ||
is_data_processed);
is_data_processed || transient_suppressor_enabled_);
}
bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const {
@ -784,7 +930,8 @@ bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const {
}
bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {
if (!is_data_processed && !voice_detection_->is_enabled()) {
if (!is_data_processed && !voice_detection_->is_enabled() &&
!transient_suppressor_enabled_) {
// Only level_estimator_ is enabled.
return false;
} else if (fwd_proc_format_.rate() == kSampleRate32kHz ||
@ -795,6 +942,30 @@ bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {
return false;
}
int AudioProcessingImpl::InitializeExperimentalAgc() {
if (use_new_agc_) {
if (!agc_manager_.get()) {
agc_manager_.reset(
new AgcManagerDirect(gain_control_, gain_control_for_new_agc_.get()));
}
agc_manager_->Initialize();
agc_manager_->SetCaptureMuted(output_will_be_muted_);
}
return kNoError;
}
int AudioProcessingImpl::InitializeTransient() {
if (transient_suppressor_enabled_) {
if (!transient_suppressor_.get()) {
transient_suppressor_.reset(new TransientSuppressor());
}
transient_suppressor_->Initialize(fwd_proc_format_.rate(),
split_rate_,
fwd_out_format_.num_channels());
}
return kNoError;
}
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
int AudioProcessingImpl::WriteMessageToDebugFile() {
int32_t size = event_msg_->ByteSize();

View File

@ -8,28 +8,32 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include <list>
#include <string>
#include "webrtc/base/thread_annotations.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
namespace webrtc {
class AgcManagerDirect;
class AudioBuffer;
class CriticalSectionWrapper;
class EchoCancellationImpl;
class EchoControlMobileImpl;
class FileWrapper;
class GainControlImpl;
class GainControlForNewAgc;
class HighPassFilterImpl;
class LevelEstimatorImpl;
class NoiseSuppressionImpl;
class ProcessingComponent;
class TransientSuppressor;
class VoiceDetectionImpl;
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
@ -138,7 +142,7 @@ class AudioProcessingImpl : public AudioProcessing {
protected:
// Overridden in a mock.
virtual int InitializeLocked();
virtual int InitializeLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
private:
int InitializeLocked(int input_sample_rate_hz,
@ -146,20 +150,24 @@ class AudioProcessingImpl : public AudioProcessing {
int reverse_sample_rate_hz,
int num_input_channels,
int num_output_channels,
int num_reverse_channels);
int num_reverse_channels)
EXCLUSIVE_LOCKS_REQUIRED(crit_);
int MaybeInitializeLocked(int input_sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
int num_input_channels,
int num_output_channels,
int num_reverse_channels);
int ProcessStreamLocked();
int AnalyzeReverseStreamLocked();
int num_reverse_channels)
EXCLUSIVE_LOCKS_REQUIRED(crit_);
int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
int AnalyzeReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
bool is_data_processed() const;
bool output_copy_needed(bool is_data_processed) const;
bool synthesis_needed(bool is_data_processed) const;
bool analysis_needed(bool is_data_processed) const;
int InitializeExperimentalAgc() EXCLUSIVE_LOCKS_REQUIRED(crit_);
int InitializeTransient() EXCLUSIVE_LOCKS_REQUIRED(crit_);
EchoCancellationImpl* echo_cancellation_;
EchoControlMobileImpl* echo_control_mobile_;
@ -168,6 +176,7 @@ class AudioProcessingImpl : public AudioProcessing {
LevelEstimatorImpl* level_estimator_;
NoiseSuppressionImpl* noise_suppression_;
VoiceDetectionImpl* voice_detection_;
scoped_ptr<GainControlForNewAgc> gain_control_for_new_agc_;
std::list<ProcessingComponent*> component_list_;
CriticalSectionWrapper* crit_;
@ -199,8 +208,15 @@ class AudioProcessingImpl : public AudioProcessing {
bool output_will_be_muted_;
bool key_pressed_;
// Only set through the constructor's Config parameter.
const bool use_new_agc_;
scoped_ptr<AgcManagerDirect> agc_manager_ GUARDED_BY(crit_);
bool transient_suppressor_enabled_;
scoped_ptr<TransientSuppressor> transient_suppressor_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_

View File

@ -27,7 +27,9 @@ class MockInitialize : public AudioProcessingImpl {
}
MOCK_METHOD0(InitializeLocked, int());
int RealInitializeLocked() { return AudioProcessingImpl::InitializeLocked(); }
int RealInitializeLocked() NO_THREAD_SAFETY_ANALYSIS {
return AudioProcessingImpl::InitializeLocked();
}
};
TEST(AudioProcessingImplTest, AudioParameterChangeTriggersInit) {

View File

@ -46,6 +46,33 @@
],
'sources': [ 'test/unpack.cc', ],
},
{
'target_name': 'transient_suppression_test',
'type': 'executable',
'dependencies': [
'<(DEPTH)/testing/gtest.gyp:gtest',
'<(DEPTH)/third_party/gflags/gflags.gyp:gflags',
'<(webrtc_root)/test/test.gyp:test_support',
'<(webrtc_root)/modules/modules.gyp:audio_processing',
],
'sources': [
'transient/transient_suppression_test.cc',
'transient/file_utils.cc',
'transient/file_utils.h',
],
}, # transient_suppression_test
{
'target_name': 'click_annotate',
'type': 'executable',
'dependencies': [
'<(webrtc_root)/modules/modules.gyp:audio_processing',
],
'sources': [
'transient/click_annotate.cc',
'transient/file_utils.cc',
'transient/file_utils.h',
],
}, # click_annotate
],
}],
],

View File

@ -0,0 +1,114 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <cfloat>
#include <cstdio>
#include <cstdlib>
#include <vector>
#include "webrtc/modules/audio_processing/transient/transient_detector.h"
#include "webrtc/modules/audio_processing/transient/file_utils.h"
#include "webrtc/system_wrappers/interface/file_wrapper.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
using webrtc::FileWrapper;
using webrtc::TransientDetector;
using webrtc::scoped_ptr;
// Application to generate a RTP timing file.
// Opens the PCM file and divides the signal in frames.
// Creates a send times array, one for each step.
// Each block that contains a transient, has an infinite send time.
// The resultant array is written to a DAT file
// Returns -1 on error or |lost_packets| otherwise.
int main(int argc, char* argv[]) {
if (argc != 5) {
printf("\n%s - Application to generate a RTP timing file.\n\n", argv[0]);
printf("%s PCMfile DATfile chunkSize sampleRate\n\n", argv[0]);
printf("Opens the PCMfile with sampleRate in Hertz.\n");
printf("Creates a send times array, one for each chunkSize ");
printf("milliseconds step.\n");
printf("Each block that contains a transient, has an infinite send time. ");
printf("The resultant array is written to a DATfile.\n\n");
return 0;
}
scoped_ptr<FileWrapper> pcm_file(FileWrapper::Create());
pcm_file->OpenFile(argv[1], true, false, false);
if (!pcm_file->Open()) {
printf("\nThe %s could not be opened.\n\n", argv[1]);
return -1;
}
scoped_ptr<FileWrapper> dat_file(FileWrapper::Create());
dat_file->OpenFile(argv[2], false, false, false);
if (!dat_file->Open()) {
printf("\nThe %s could not be opened.\n\n", argv[2]);
return -1;
}
int chunk_size_ms = atoi(argv[3]);
if (chunk_size_ms <= 0) {
printf("\nThe chunkSize must be a positive integer\n\n");
return -1;
}
int sample_rate_hz = atoi(argv[4]);
if (sample_rate_hz <= 0) {
printf("\nThe sampleRate must be a positive integer\n\n");
return -1;
}
TransientDetector detector(sample_rate_hz);
int lost_packets = 0;
size_t audio_buffer_length = chunk_size_ms * sample_rate_hz / 1000;
scoped_ptr<float[]> audio_buffer(new float[audio_buffer_length]);
std::vector<float> send_times;
// Read first buffer from the PCM test file.
size_t file_samples_read = ReadInt16FromFileToFloatBuffer(
pcm_file.get(),
audio_buffer_length,
audio_buffer.get());
for (int time = 0; file_samples_read > 0; time += chunk_size_ms) {
// Pad the rest of the buffer with zeros.
for (size_t i = file_samples_read; i < audio_buffer_length; ++i) {
audio_buffer[i] = 0.0;
}
float value =
detector.Detect(audio_buffer.get(), audio_buffer_length, NULL, 0);
if (value < 0.5f) {
value = time;
} else {
value = FLT_MAX;
++lost_packets;
}
send_times.push_back(value);
// Read next buffer from the PCM test file.
file_samples_read = ReadInt16FromFileToFloatBuffer(pcm_file.get(),
audio_buffer_length,
audio_buffer.get());
}
size_t floats_written = WriteFloatBufferToFile(dat_file.get(),
send_times.size(),
&send_times[0]);
if (floats_written == 0) {
printf("\nThe send times could not be written to DAT file\n\n");
return -1;
}
pcm_file->CloseFile();
dat_file->CloseFile();
return lost_packets;
}

View File

@ -0,0 +1,27 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_
namespace webrtc {
namespace ts {
static const float kPi = 3.14159265358979323846f;
static const int kChunkSizeMs = 10;
enum {
kSampleRate8kHz = 8000,
kSampleRate16kHz = 16000,
kSampleRate32kHz = 32000,
kSampleRate48kHz = 48000
};
} // namespace ts
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// This header file defines the coefficients of the FIR based approximation of
// the Meyer Wavelet
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_
// Decomposition coefficients Daubechies 8.
namespace webrtc {
const int kDaubechies8CoefficientsLength = 16;
const float kDaubechies8HighPassCoefficients[kDaubechies8CoefficientsLength]
= {
-5.44158422430816093862e-02f,
3.12871590914465924627e-01f,
-6.75630736298012846142e-01f,
5.85354683654869090148e-01f,
1.58291052560238926228e-02f,
-2.84015542962428091389e-01f,
-4.72484573997972536787e-04f,
1.28747426620186011803e-01f,
1.73693010020221083600e-02f,
-4.40882539310647192377e-02f,
-1.39810279170155156436e-02f,
8.74609404701565465445e-03f,
4.87035299301066034600e-03f,
-3.91740372995977108837e-04f,
-6.75449405998556772109e-04f,
-1.17476784002281916305e-04f
};
const float kDaubechies8LowPassCoefficients[kDaubechies8CoefficientsLength] = {
-1.17476784002281916305e-04f,
6.75449405998556772109e-04f,
-3.91740372995977108837e-04f,
-4.87035299301066034600e-03f,
8.74609404701565465445e-03f,
1.39810279170155156436e-02f,
-4.40882539310647192377e-02f,
-1.73693010020221083600e-02f,
1.28747426620186011803e-01f,
4.72484573997972536787e-04f,
-2.84015542962428091389e-01f,
-1.58291052560238926228e-02f,
5.85354683654869090148e-01f,
6.75630736298012846142e-01f,
3.12871590914465924627e-01f,
5.44158422430816093862e-02f
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_

View File

@ -0,0 +1,70 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_
#include <cstdlib>
#include "webrtc/typedefs.h"
// Provides a set of static methods to perform dyadic decimations.
namespace webrtc {
// Returns the proper length of the output buffer that you should use for the
// given |in_length| and decimation |odd_sequence|.
// Return -1 on error.
inline size_t GetOutLengthToDyadicDecimate(size_t in_length,
bool odd_sequence) {
size_t out_length = in_length / 2;
if (in_length % 2 == 1 && !odd_sequence) {
++out_length;
}
return out_length;
}
// Performs a dyadic decimation: removes every odd/even member of a sequence
// halving its overall length.
// Arguments:
// in: array of |in_length|.
// odd_sequence: If false, the odd members will be removed (1, 3, 5, ...);
// if true, the even members will be removed (0, 2, 4, ...).
// out: array of |out_length|. |out_length| must be large enough to
// hold the decimated output. The necessary length can be provided by
// GetOutLengthToDyadicDecimate().
// Must be previously allocated.
// Returns the number of output samples, -1 on error.
template<typename T>
static size_t DyadicDecimate(const T* in,
size_t in_length,
bool odd_sequence,
T* out,
size_t out_length) {
size_t half_length = GetOutLengthToDyadicDecimate(in_length, odd_sequence);
if (!in || !out || in_length <= 0 || out_length < half_length) {
return 0;
}
size_t output_samples = 0;
size_t index_adjustment = odd_sequence ? 1 : 0;
for (output_samples = 0; output_samples < half_length; ++output_samples) {
out[output_samples] = in[output_samples * 2 + index_adjustment];
}
return output_samples;
}
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_

View File

@ -0,0 +1,126 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/dyadic_decimator.h"
#include "testing/gtest/include/gtest/gtest.h"
namespace webrtc {
static const size_t kEvenBufferLength = 6;
static const size_t kOddBufferLength = 5;
static const size_t kOutBufferLength = 3;
int16_t const test_buffer_even_len[] = {0, 1, 2, 3, 4, 5};
int16_t const test_buffer_odd_len[] = {0, 1, 2, 3, 4};
int16_t test_buffer_out[kOutBufferLength];
TEST(DyadicDecimatorTest, GetOutLengthToDyadicDecimate) {
EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(6, false));
EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(6, true));
EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(5, false));
EXPECT_EQ(2u, GetOutLengthToDyadicDecimate(5, true));
}
TEST(DyadicDecimatorTest, DyadicDecimateErrorValues) {
size_t out_samples = 0;
out_samples = DyadicDecimate(static_cast<int16_t*>(NULL),
kEvenBufferLength,
false, // Even sequence.
test_buffer_out,
kOutBufferLength);
EXPECT_EQ(0u, out_samples);
out_samples = DyadicDecimate(test_buffer_even_len,
kEvenBufferLength,
false, // Even sequence.
static_cast<int16_t*>(NULL),
kOutBufferLength);
EXPECT_EQ(0u, out_samples);
// Less than required |out_length|.
out_samples = DyadicDecimate(test_buffer_even_len,
kEvenBufferLength,
false, // Even sequence.
test_buffer_out,
2);
EXPECT_EQ(0u, out_samples);
}
TEST(DyadicDecimatorTest, DyadicDecimateEvenLengthEvenSequence) {
size_t expected_out_samples =
GetOutLengthToDyadicDecimate(kEvenBufferLength, false);
size_t out_samples = DyadicDecimate(test_buffer_even_len,
kEvenBufferLength,
false, // Even sequence.
test_buffer_out,
kOutBufferLength);
EXPECT_EQ(expected_out_samples, out_samples);
EXPECT_EQ(0, test_buffer_out[0]);
EXPECT_EQ(2, test_buffer_out[1]);
EXPECT_EQ(4, test_buffer_out[2]);
}
TEST(DyadicDecimatorTest, DyadicDecimateEvenLengthOddSequence) {
size_t expected_out_samples =
GetOutLengthToDyadicDecimate(kEvenBufferLength, true);
size_t out_samples = DyadicDecimate(test_buffer_even_len,
kEvenBufferLength,
true, // Odd sequence.
test_buffer_out,
kOutBufferLength);
EXPECT_EQ(expected_out_samples, out_samples);
EXPECT_EQ(1, test_buffer_out[0]);
EXPECT_EQ(3, test_buffer_out[1]);
EXPECT_EQ(5, test_buffer_out[2]);
}
TEST(DyadicDecimatorTest, DyadicDecimateOddLengthEvenSequence) {
size_t expected_out_samples =
GetOutLengthToDyadicDecimate(kOddBufferLength, false);
size_t out_samples = DyadicDecimate(test_buffer_odd_len,
kOddBufferLength,
false, // Even sequence.
test_buffer_out,
kOutBufferLength);
EXPECT_EQ(expected_out_samples, out_samples);
EXPECT_EQ(0, test_buffer_out[0]);
EXPECT_EQ(2, test_buffer_out[1]);
EXPECT_EQ(4, test_buffer_out[2]);
}
TEST(DyadicDecimatorTest, DyadicDecimateOddLengthOddSequence) {
size_t expected_out_samples =
GetOutLengthToDyadicDecimate(kOddBufferLength, true);
size_t out_samples = DyadicDecimate(test_buffer_odd_len,
kOddBufferLength,
true, // Odd sequence.
test_buffer_out,
kOutBufferLength);
EXPECT_EQ(expected_out_samples, out_samples);
EXPECT_EQ(1, test_buffer_out[0]);
EXPECT_EQ(3, test_buffer_out[1]);
}
} // namespace webrtc

View File

@ -0,0 +1,257 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/file_utils.h"
#include "webrtc/system_wrappers/interface/file_wrapper.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/typedefs.h"
namespace webrtc {
int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out) {
if (!bytes || !out) {
return -1;
}
uint32_t binary_value = 0;
for (int i = 3; i >= 0; --i) {
binary_value <<= 8;
binary_value += bytes[i];
}
*out = bit_cast<float>(binary_value);
return 0;
}
int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out) {
if (!bytes || !out) {
return -1;
}
uint64_t binary_value = 0;
for (int i = 7; i >= 0; --i) {
binary_value <<= 8;
binary_value += bytes[i];
}
*out = bit_cast<double>(binary_value);
return 0;
}
int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]) {
if (!out_bytes) {
return -1;
}
uint32_t binary_value = bit_cast<uint32_t>(value);
for (size_t i = 0; i < 4; ++i) {
out_bytes[i] = binary_value;
binary_value >>= 8;
}
return 0;
}
int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]) {
if (!out_bytes) {
return -1;
}
uint64_t binary_value = bit_cast<uint64_t>(value);
for (size_t i = 0; i < 8; ++i) {
out_bytes[i] = binary_value;
binary_value >>= 8;
}
return 0;
}
size_t ReadInt16BufferFromFile(FileWrapper* file,
size_t length,
int16_t* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
scoped_ptr<uint8_t[]> byte_array(new uint8_t[2]);
size_t int16s_read = 0;
while (int16s_read < length) {
size_t bytes_read = file->Read(byte_array.get(), 2);
if (bytes_read < 2) {
break;
}
int16_t value = byte_array[1];
value <<= 8;
value += byte_array[0];
buffer[int16s_read] = value;
++int16s_read;
}
return int16s_read;
}
size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file,
size_t length,
float* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
scoped_ptr<int16_t[]> buffer16(new int16_t[length]);
size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get());
for (size_t i = 0; i < int16s_read; ++i) {
buffer[i] = buffer16[i];
}
return int16s_read;
}
size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file,
size_t length,
double* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
scoped_ptr<int16_t[]> buffer16(new int16_t[length]);
size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get());
for (size_t i = 0; i < int16s_read; ++i) {
buffer[i] = buffer16[i];
}
return int16s_read;
}
size_t ReadFloatBufferFromFile(FileWrapper* file,
size_t length,
float* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
scoped_ptr<uint8_t[]> byte_array(new uint8_t[4]);
size_t floats_read = 0;
while (floats_read < length) {
size_t bytes_read = file->Read(byte_array.get(), 4);
if (bytes_read < 4) {
break;
}
ConvertByteArrayToFloat(byte_array.get(), &buffer[floats_read]);
++floats_read;
}
return floats_read;
}
size_t ReadDoubleBufferFromFile(FileWrapper* file,
size_t length,
double* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
scoped_ptr<uint8_t[]> byte_array(new uint8_t[8]);
size_t doubles_read = 0;
while (doubles_read < length) {
size_t bytes_read = file->Read(byte_array.get(), 8);
if (bytes_read < 8) {
break;
}
ConvertByteArrayToDouble(byte_array.get(), &buffer[doubles_read]);
++doubles_read;
}
return doubles_read;
}
size_t WriteInt16BufferToFile(FileWrapper* file,
size_t length,
const int16_t* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
scoped_ptr<uint8_t[]> byte_array(new uint8_t[2]);
size_t int16s_written = 0;
for (int16s_written = 0; int16s_written < length; ++int16s_written) {
// Get byte representation.
byte_array[0] = buffer[int16s_written] & 0xFF;
byte_array[1] = (buffer[int16s_written] >> 8) & 0xFF;
file->Write(byte_array.get(), 2);
}
file->Flush();
return int16s_written;
}
size_t WriteFloatBufferToFile(FileWrapper* file,
size_t length,
const float* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
scoped_ptr<uint8_t[]> byte_array(new uint8_t[4]);
size_t floats_written = 0;
for (floats_written = 0; floats_written < length; ++floats_written) {
// Get byte representation.
ConvertFloatToByteArray(buffer[floats_written], byte_array.get());
file->Write(byte_array.get(), 4);
}
file->Flush();
return floats_written;
}
size_t WriteDoubleBufferToFile(FileWrapper* file,
size_t length,
const double* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
scoped_ptr<uint8_t[]> byte_array(new uint8_t[8]);
size_t doubles_written = 0;
for (doubles_written = 0; doubles_written < length; ++doubles_written) {
// Get byte representation.
ConvertDoubleToByteArray(buffer[doubles_written], byte_array.get());
file->Write(byte_array.get(), 8);
}
file->Flush();
return doubles_written;
}
} // namespace webrtc

View File

@ -0,0 +1,119 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_
#include <string.h>
#include "webrtc/base/compile_assert.h"
#include "webrtc/system_wrappers/interface/file_wrapper.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// This is a copy of the cast included in the Chromium codebase here:
// http://cs.chromium.org/src/third_party/cld/base/casts.h
template <class Dest, class Source>
inline Dest bit_cast(const Source& source) {
// A compile error here means your Dest and Source have different sizes.
COMPILE_ASSERT(sizeof(Dest) == sizeof(Source),
dest_and_source_have_different_sizes);
Dest dest;
memcpy(&dest, &source, sizeof(dest));
return dest;
}
// Converts the byte array with binary float representation to float.
// Bytes must be in little-endian order.
// Returns 0 if correct, -1 on error.
int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out);
// Converts the byte array with binary double representation to double.
// Bytes must be in little-endian order.
// Returns 0 if correct, -1 on error.
int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out);
// Converts a float to a byte array with binary float representation.
// Bytes will be in little-endian order.
// Returns 0 if correct, -1 on error.
int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]);
// Converts a double to a byte array with binary double representation.
// Bytes will be in little-endian order.
// Returns 0 if correct, -1 on error.
int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]);
// Reads |length| 16-bit integers from |file| to |buffer|.
// |file| must be previously opened.
// Returns the number of 16-bit integers read or -1 on error.
size_t ReadInt16BufferFromFile(FileWrapper* file,
size_t length,
int16_t* buffer);
// Reads |length| 16-bit integers from |file| and stores those values
// (converting them) in |buffer|.
// |file| must be previously opened.
// Returns the number of 16-bit integers read or -1 on error.
size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file,
size_t length,
float* buffer);
// Reads |length| 16-bit integers from |file| and stores those values
// (converting them) in |buffer|.
// |file| must be previously opened.
// Returns the number of 16-bit integers read or -1 on error.
size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file,
size_t length,
double* buffer);
// Reads |length| floats in binary representation (4 bytes) from |file| to
// |buffer|.
// |file| must be previously opened.
// Returns the number of floats read or -1 on error.
size_t ReadFloatBufferFromFile(FileWrapper* file, size_t length, float* buffer);
// Reads |length| doubles in binary representation (8 bytes) from |file| to
// |buffer|.
// |file| must be previously opened.
// Returns the number of doubles read or -1 on error.
size_t ReadDoubleBufferFromFile(FileWrapper* file,
size_t length,
double* buffer);
// Writes |length| 16-bit integers from |buffer| in binary representation (2
// bytes) to |file|. It flushes |file|, so after this call there are no
// writings pending.
// |file| must be previously opened.
// Returns the number of doubles written or -1 on error.
size_t WriteInt16BufferToFile(FileWrapper* file,
size_t length,
const int16_t* buffer);
// Writes |length| floats from |buffer| in binary representation (4 bytes) to
// |file|. It flushes |file|, so after this call there are no writtings pending.
// |file| must be previously opened.
// Returns the number of doubles written or -1 on error.
size_t WriteFloatBufferToFile(FileWrapper* file,
size_t length,
const float* buffer);
// Writes |length| doubles from |buffer| in binary representation (8 bytes) to
// |file|. It flushes |file|, so after this call there are no writings pending.
// |file| must be previously opened.
// Returns the number of doubles written or -1 on error.
size_t WriteDoubleBufferToFile(FileWrapper* file,
size_t length,
const double* buffer);
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_

View File

@ -0,0 +1,484 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/file_utils.h"
#include <string.h>
#include <string>
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/system_wrappers/interface/file_wrapper.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/test/testsupport/fileutils.h"
#include "webrtc/typedefs.h"
namespace webrtc {
static const uint8_t kPiBytesf[4] = {0xDB, 0x0F, 0x49, 0x40};
static const uint8_t kEBytesf[4] = {0x54, 0xF8, 0x2D, 0x40};
static const uint8_t kAvogadroBytesf[4] = {0x2F, 0x0C, 0xFF, 0x66};
static const uint8_t kPiBytes[8] =
{0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40};
static const uint8_t kEBytes[8] =
{0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40};
static const uint8_t kAvogadroBytes[8] =
{0xF4, 0xBC, 0xA8, 0xDF, 0x85, 0xE1, 0xDF, 0x44};
static const double kPi = 3.14159265358979323846;
static const double kE = 2.71828182845904523536;
static const double kAvogadro = 602214100000000000000000.0;
class TransientFileUtilsTest: public ::testing::Test {
protected:
TransientFileUtilsTest()
: kTestFileName(
test::ResourcePath("audio_processing/transient/double-utils",
"dat")),
kTestFileNamef(
test::ResourcePath("audio_processing/transient/float-utils",
"dat")) {}
// This file (used in some tests) contains binary data. The data correspond to
// the double representation of the constants: Pi, E, and the Avogadro's
// Number;
// appended in that order.
const std::string kTestFileName;
// This file (used in some tests) contains binary data. The data correspond to
// the float representation of the constants: Pi, E, and the Avogadro's
// Number;
// appended in that order.
const std::string kTestFileNamef;
};
TEST_F(TransientFileUtilsTest, ConvertByteArrayToFloat) {
float value = 0.0;
EXPECT_EQ(0, ConvertByteArrayToFloat(kPiBytesf, &value));
EXPECT_FLOAT_EQ(kPi, value);
EXPECT_EQ(0, ConvertByteArrayToFloat(kEBytesf, &value));
EXPECT_FLOAT_EQ(kE, value);
EXPECT_EQ(0, ConvertByteArrayToFloat(kAvogadroBytesf, &value));
EXPECT_FLOAT_EQ(kAvogadro, value);
}
TEST_F(TransientFileUtilsTest, ConvertByteArrayToDouble) {
double value = 0.0;
EXPECT_EQ(0, ConvertByteArrayToDouble(kPiBytes, &value));
EXPECT_DOUBLE_EQ(kPi, value);
EXPECT_EQ(0, ConvertByteArrayToDouble(kEBytes, &value));
EXPECT_DOUBLE_EQ(kE, value);
EXPECT_EQ(0, ConvertByteArrayToDouble(kAvogadroBytes, &value));
EXPECT_DOUBLE_EQ(kAvogadro, value);
}
TEST_F(TransientFileUtilsTest, ConvertFloatToByteArray) {
scoped_ptr<uint8_t[]> bytes(new uint8_t[4]);
EXPECT_EQ(0, ConvertFloatToByteArray(kPi, bytes.get()));
EXPECT_EQ(0, memcmp(bytes.get(), kPiBytesf, 4));
EXPECT_EQ(0, ConvertFloatToByteArray(kE, bytes.get()));
EXPECT_EQ(0, memcmp(bytes.get(), kEBytesf, 4));
EXPECT_EQ(0, ConvertFloatToByteArray(kAvogadro, bytes.get()));
EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytesf, 4));
}
TEST_F(TransientFileUtilsTest, ConvertDoubleToByteArray) {
scoped_ptr<uint8_t[]> bytes(new uint8_t[8]);
EXPECT_EQ(0, ConvertDoubleToByteArray(kPi, bytes.get()));
EXPECT_EQ(0, memcmp(bytes.get(), kPiBytes, 8));
EXPECT_EQ(0, ConvertDoubleToByteArray(kE, bytes.get()));
EXPECT_EQ(0, memcmp(bytes.get(), kEBytes, 8));
EXPECT_EQ(0, ConvertDoubleToByteArray(kAvogadro, bytes.get()));
EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytes, 8));
}
TEST_F(TransientFileUtilsTest, ReadInt16BufferFromFile) {
std::string test_filename = kTestFileName;
scoped_ptr<FileWrapper> file(FileWrapper::Create());
file->OpenFile(test_filename.c_str(),
true, // Read only.
true, // Loop.
false); // No text.
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
<< kTestFileName.c_str();
const size_t kBufferLength = 12;
scoped_ptr<int16_t[]> buffer(new int16_t[kBufferLength]);
EXPECT_EQ(kBufferLength, ReadInt16BufferFromFile(file.get(),
kBufferLength,
buffer.get()));
EXPECT_EQ(22377, buffer[4]);
EXPECT_EQ(16389, buffer[7]);
EXPECT_EQ(17631, buffer[kBufferLength - 1]);
file->Rewind();
// The next test is for checking the case where there are not as much data as
// needed in the file, but reads to the end, and it returns the number of
// int16s read.
const size_t kBufferLenghtLargerThanFile = kBufferLength * 2;
buffer.reset(new int16_t[kBufferLenghtLargerThanFile]);
EXPECT_EQ(kBufferLength, ReadInt16BufferFromFile(file.get(),
kBufferLenghtLargerThanFile,
buffer.get()));
EXPECT_EQ(11544, buffer[0]);
EXPECT_EQ(22377, buffer[4]);
EXPECT_EQ(16389, buffer[7]);
EXPECT_EQ(17631, buffer[kBufferLength - 1]);
}
TEST_F(TransientFileUtilsTest, ReadInt16FromFileToFloatBuffer) {
std::string test_filename = kTestFileName;
scoped_ptr<FileWrapper> file(FileWrapper::Create());
file->OpenFile(test_filename.c_str(),
true, // Read only.
true, // Loop.
false); // No text.
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
<< kTestFileName.c_str();
const size_t kBufferLength = 12;
scoped_ptr<float[]> buffer(new float[kBufferLength]);
EXPECT_EQ(kBufferLength, ReadInt16FromFileToFloatBuffer(file.get(),
kBufferLength,
buffer.get()));
EXPECT_DOUBLE_EQ(11544, buffer[0]);
EXPECT_DOUBLE_EQ(22377, buffer[4]);
EXPECT_DOUBLE_EQ(16389, buffer[7]);
EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]);
file->Rewind();
// The next test is for checking the case where there are not as much data as
// needed in the file, but reads to the end, and it returns the number of
// int16s read.
const size_t kBufferLenghtLargerThanFile = kBufferLength * 2;
buffer.reset(new float[kBufferLenghtLargerThanFile]);
EXPECT_EQ(kBufferLength,
ReadInt16FromFileToFloatBuffer(file.get(),
kBufferLenghtLargerThanFile,
buffer.get()));
EXPECT_DOUBLE_EQ(11544, buffer[0]);
EXPECT_DOUBLE_EQ(22377, buffer[4]);
EXPECT_DOUBLE_EQ(16389, buffer[7]);
EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]);
}
TEST_F(TransientFileUtilsTest, ReadInt16FromFileToDoubleBuffer) {
std::string test_filename = kTestFileName;
scoped_ptr<FileWrapper> file(FileWrapper::Create());
file->OpenFile(test_filename.c_str(),
true, // Read only.
true, // Loop.
false); // No text.
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
<< kTestFileName.c_str();
const size_t kBufferLength = 12;
scoped_ptr<double[]> buffer(new double[kBufferLength]);
EXPECT_EQ(kBufferLength, ReadInt16FromFileToDoubleBuffer(file.get(),
kBufferLength,
buffer.get()));
EXPECT_DOUBLE_EQ(11544, buffer[0]);
EXPECT_DOUBLE_EQ(22377, buffer[4]);
EXPECT_DOUBLE_EQ(16389, buffer[7]);
EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]);
file->Rewind();
// The next test is for checking the case where there are not as much data as
// needed in the file, but reads to the end, and it returns the number of
// int16s read.
const size_t kBufferLenghtLargerThanFile = kBufferLength * 2;
buffer.reset(new double[kBufferLenghtLargerThanFile]);
EXPECT_EQ(kBufferLength,
ReadInt16FromFileToDoubleBuffer(file.get(),
kBufferLenghtLargerThanFile,
buffer.get()));
EXPECT_DOUBLE_EQ(11544, buffer[0]);
EXPECT_DOUBLE_EQ(22377, buffer[4]);
EXPECT_DOUBLE_EQ(16389, buffer[7]);
EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]);
}
TEST_F(TransientFileUtilsTest, ReadFloatBufferFromFile) {
std::string test_filename = kTestFileNamef;
scoped_ptr<FileWrapper> file(FileWrapper::Create());
file->OpenFile(test_filename.c_str(),
true, // Read only.
true, // Loop.
false); // No text.
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
<< kTestFileNamef.c_str();
const size_t kBufferLength = 3;
scoped_ptr<float[]> buffer(new float[kBufferLength]);
EXPECT_EQ(kBufferLength, ReadFloatBufferFromFile(file.get(),
kBufferLength,
buffer.get()));
EXPECT_FLOAT_EQ(kPi, buffer[0]);
EXPECT_FLOAT_EQ(kE, buffer[1]);
EXPECT_FLOAT_EQ(kAvogadro, buffer[2]);
file->Rewind();
// The next test is for checking the case where there are not as much data as
// needed in the file, but reads to the end, and it returns the number of
// doubles read.
const size_t kBufferLenghtLargerThanFile = kBufferLength * 2;
buffer.reset(new float[kBufferLenghtLargerThanFile]);
EXPECT_EQ(kBufferLength, ReadFloatBufferFromFile(file.get(),
kBufferLenghtLargerThanFile,
buffer.get()));
EXPECT_FLOAT_EQ(kPi, buffer[0]);
EXPECT_FLOAT_EQ(kE, buffer[1]);
EXPECT_FLOAT_EQ(kAvogadro, buffer[2]);
}
TEST_F(TransientFileUtilsTest, ReadDoubleBufferFromFile) {
std::string test_filename = kTestFileName;
scoped_ptr<FileWrapper> file(FileWrapper::Create());
file->OpenFile(test_filename.c_str(),
true, // Read only.
true, // Loop.
false); // No text.
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
<< kTestFileName.c_str();
const size_t kBufferLength = 3;
scoped_ptr<double[]> buffer(new double[kBufferLength]);
EXPECT_EQ(kBufferLength, ReadDoubleBufferFromFile(file.get(),
kBufferLength,
buffer.get()));
EXPECT_DOUBLE_EQ(kPi, buffer[0]);
EXPECT_DOUBLE_EQ(kE, buffer[1]);
EXPECT_DOUBLE_EQ(kAvogadro, buffer[2]);
file->Rewind();
// The next test is for checking the case where there are not as much data as
// needed in the file, but reads to the end, and it returns the number of
// doubles read.
const size_t kBufferLenghtLargerThanFile = kBufferLength * 2;
buffer.reset(new double[kBufferLenghtLargerThanFile]);
EXPECT_EQ(kBufferLength, ReadDoubleBufferFromFile(file.get(),
kBufferLenghtLargerThanFile,
buffer.get()));
EXPECT_DOUBLE_EQ(kPi, buffer[0]);
EXPECT_DOUBLE_EQ(kE, buffer[1]);
EXPECT_DOUBLE_EQ(kAvogadro, buffer[2]);
}
TEST_F(TransientFileUtilsTest, WriteInt16BufferToFile) {
scoped_ptr<FileWrapper> file(FileWrapper::Create());
std::string kOutFileName = test::OutputPath() + "utils_test.out";
file->OpenFile(kOutFileName.c_str(),
false, // Write mode.
false, // No loop.
false); // No text.
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
<< kOutFileName.c_str();
const size_t kBufferLength = 3;
scoped_ptr<int16_t[]> written_buffer(new int16_t[kBufferLength]);
scoped_ptr<int16_t[]> read_buffer(new int16_t[kBufferLength]);
written_buffer[0] = 1;
written_buffer[1] = 2;
written_buffer[2] = 3;
EXPECT_EQ(kBufferLength, WriteInt16BufferToFile(file.get(),
kBufferLength,
written_buffer.get()));
file->CloseFile();
file->OpenFile(kOutFileName.c_str(),
true, // Read only.
false, // No loop.
false); // No text.
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
<< kOutFileName.c_str();
EXPECT_EQ(kBufferLength, ReadInt16BufferFromFile(file.get(),
kBufferLength,
read_buffer.get()));
EXPECT_EQ(0, memcmp(written_buffer.get(),
read_buffer.get(),
kBufferLength * sizeof(written_buffer[0])));
}
TEST_F(TransientFileUtilsTest, WriteFloatBufferToFile) {
scoped_ptr<FileWrapper> file(FileWrapper::Create());
std::string kOutFileName = test::OutputPath() + "utils_test.out";
file->OpenFile(kOutFileName.c_str(),
false, // Write mode.
false, // No loop.
false); // No text.
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
<< kOutFileName.c_str();
const size_t kBufferLength = 3;
scoped_ptr<float[]> written_buffer(new float[kBufferLength]);
scoped_ptr<float[]> read_buffer(new float[kBufferLength]);
written_buffer[0] = kPi;
written_buffer[1] = kE;
written_buffer[2] = kAvogadro;
EXPECT_EQ(kBufferLength, WriteFloatBufferToFile(file.get(),
kBufferLength,
written_buffer.get()));
file->CloseFile();
file->OpenFile(kOutFileName.c_str(),
true, // Read only.
false, // No loop.
false); // No text.
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
<< kOutFileName.c_str();
EXPECT_EQ(kBufferLength, ReadFloatBufferFromFile(file.get(),
kBufferLength,
read_buffer.get()));
EXPECT_EQ(0, memcmp(written_buffer.get(),
read_buffer.get(),
kBufferLength * sizeof(written_buffer[0])));
}
TEST_F(TransientFileUtilsTest, WriteDoubleBufferToFile) {
scoped_ptr<FileWrapper> file(FileWrapper::Create());
std::string kOutFileName = test::OutputPath() + "utils_test.out";
file->OpenFile(kOutFileName.c_str(),
false, // Write mode.
false, // No loop.
false); // No text.
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
<< kOutFileName.c_str();
const size_t kBufferLength = 3;
scoped_ptr<double[]> written_buffer(new double[kBufferLength]);
scoped_ptr<double[]> read_buffer(new double[kBufferLength]);
written_buffer[0] = kPi;
written_buffer[1] = kE;
written_buffer[2] = kAvogadro;
EXPECT_EQ(kBufferLength, WriteDoubleBufferToFile(file.get(),
kBufferLength,
written_buffer.get()));
file->CloseFile();
file->OpenFile(kOutFileName.c_str(),
true, // Read only.
false, // No loop.
false); // No text.
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
<< kOutFileName.c_str();
EXPECT_EQ(kBufferLength, ReadDoubleBufferFromFile(file.get(),
kBufferLength,
read_buffer.get()));
EXPECT_EQ(0, memcmp(written_buffer.get(),
read_buffer.get(),
kBufferLength * sizeof(written_buffer[0])));
}
TEST_F(TransientFileUtilsTest, ExpectedErrorReturnValues) {
std::string test_filename = kTestFileName;
double value;
scoped_ptr<int16_t[]> int16_buffer(new int16_t[1]);
scoped_ptr<double[]> double_buffer(new double[1]);
scoped_ptr<FileWrapper> file(FileWrapper::Create());
EXPECT_EQ(-1, ConvertByteArrayToDouble(NULL, &value));
EXPECT_EQ(-1, ConvertByteArrayToDouble(kPiBytes, NULL));
EXPECT_EQ(-1, ConvertDoubleToByteArray(kPi, NULL));
// Tests with file not opened.
EXPECT_EQ(0u, ReadInt16BufferFromFile(file.get(), 1, int16_buffer.get()));
EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(file.get(),
1,
double_buffer.get()));
EXPECT_EQ(0u, ReadDoubleBufferFromFile(file.get(), 1, double_buffer.get()));
EXPECT_EQ(0u, WriteInt16BufferToFile(file.get(), 1, int16_buffer.get()));
EXPECT_EQ(0u, WriteDoubleBufferToFile(file.get(), 1, double_buffer.get()));
file->OpenFile(test_filename.c_str(),
true, // Read only.
true, // Loop.
false); // No text.
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
<< kTestFileName.c_str();
EXPECT_EQ(0u, ReadInt16BufferFromFile(NULL, 1, int16_buffer.get()));
EXPECT_EQ(0u, ReadInt16BufferFromFile(file.get(), 1, NULL));
EXPECT_EQ(0u, ReadInt16BufferFromFile(file.get(), 0, int16_buffer.get()));
EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(NULL, 1, double_buffer.get()));
EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(file.get(), 1, NULL));
EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(file.get(),
0,
double_buffer.get()));
EXPECT_EQ(0u, ReadDoubleBufferFromFile(NULL, 1, double_buffer.get()));
EXPECT_EQ(0u, ReadDoubleBufferFromFile(file.get(), 1, NULL));
EXPECT_EQ(0u, ReadDoubleBufferFromFile(file.get(), 0, double_buffer.get()));
EXPECT_EQ(0u, WriteInt16BufferToFile(NULL, 1, int16_buffer.get()));
EXPECT_EQ(0u, WriteInt16BufferToFile(file.get(), 1, NULL));
EXPECT_EQ(0u, WriteInt16BufferToFile(file.get(), 0, int16_buffer.get()));
EXPECT_EQ(0u, WriteDoubleBufferToFile(NULL, 1, double_buffer.get()));
EXPECT_EQ(0u, WriteDoubleBufferToFile(file.get(), 1, NULL));
EXPECT_EQ(0u, WriteDoubleBufferToFile(file.get(), 0, double_buffer.get()));
}
} // namespace webrtc

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/moving_moments.h"
#include <math.h>
#include <string.h>
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
namespace webrtc {
MovingMoments::MovingMoments(size_t length)
: length_(length),
queue_(),
sum_(0.0),
sum_of_squares_(0.0) {
assert(length > 0);
for (size_t i = 0; i < length; ++i) {
queue_.push(0.0);
}
}
MovingMoments::~MovingMoments() {}
void MovingMoments::CalculateMoments(const float* in, size_t in_length,
float* first, float* second) {
assert(in && in_length > 0 && first && second);
for (size_t i = 0; i < in_length; ++i) {
const float old_value = queue_.front();
queue_.pop();
queue_.push(in[i]);
sum_ += in[i] - old_value;
sum_of_squares_ += in[i] * in[i] - old_value * old_value;
first[i] = sum_ / length_;
second[i] = sum_of_squares_ / length_;
}
}
} // namespace webrtc

View File

@ -0,0 +1,52 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_
#include <queue>
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
namespace webrtc {
// Calculates the first and second moments for each value of a buffer taking
// into account a given number of previous values.
// It preserves its state, so it can be multiple-called.
// TODO(chadan): Implement a function that takes a buffer of first moments and a
// buffer of second moments; and calculates the variances. When needed.
// TODO(chadan): Add functionality to update with a buffer but only output are
// the last values of the moments. When needed.
class MovingMoments {
public:
// Creates a Moving Moments object, that uses the last |length| values
// (including the new value introduced in every new calculation).
explicit MovingMoments(size_t length);
~MovingMoments();
// Calculates the new values using |in|. Results will be in the out buffers.
// |first| and |second| must be allocated with at least |in_length|.
void CalculateMoments(const float* in, size_t in_length,
float* first, float* second);
private:
size_t length_;
// A queue holding the |length_| latest input values.
std::queue<float> queue_;
// Sum of the values of the queue.
float sum_;
// Sum of the squares of the values of the queue.
float sum_of_squares_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_

View File

@ -0,0 +1,206 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/moving_moments.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
namespace webrtc {
static const float kTolerance = 0.0001f;
class MovingMomentsTest : public ::testing::Test {
protected:
static const size_t kMovingMomentsBufferLength = 5;
static const size_t kMaxOutputLength = 20; // Valid for this tests only.
virtual void SetUp();
// Calls CalculateMoments and verifies that it produces the expected
// outputs.
void CalculateMomentsAndVerify(const float* input, size_t input_length,
const float* expected_mean,
const float* expected_mean_squares);
scoped_ptr<MovingMoments> moving_moments_;
float output_mean_[kMaxOutputLength];
float output_mean_squares_[kMaxOutputLength];
};
const size_t MovingMomentsTest::kMaxOutputLength;
void MovingMomentsTest::SetUp() {
moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength));
}
void MovingMomentsTest::CalculateMomentsAndVerify(
const float* input, size_t input_length,
const float* expected_mean,
const float* expected_mean_squares) {
ASSERT_LE(input_length, kMaxOutputLength);
moving_moments_->CalculateMoments(input,
input_length,
output_mean_,
output_mean_squares_);
for (size_t i = 1; i < input_length; ++i) {
EXPECT_NEAR(expected_mean[i], output_mean_[i], kTolerance);
EXPECT_NEAR(expected_mean_squares[i], output_mean_squares_[i], kTolerance);
}
}
TEST_F(MovingMomentsTest, CorrectMomentsOfAnAllZerosBuffer) {
const float kInput[] = {0.f, 0.f, 0.f, 0.f, 0.f};
const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
const float expected_mean[kInputLength] = {0.f, 0.f, 0.f, 0.f, 0.f};
const float expected_mean_squares[kInputLength] = {0.f, 0.f, 0.f, 0.f, 0.f};
CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
expected_mean_squares);
}
TEST_F(MovingMomentsTest, CorrectMomentsOfAConstantBuffer) {
const float kInput[] = {5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f};
const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
const float expected_mean[kInputLength] =
{1.f, 2.f, 3.f, 4.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f};
const float expected_mean_squares[kInputLength] =
{5.f, 10.f, 15.f, 20.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f};
CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
expected_mean_squares);
}
TEST_F(MovingMomentsTest, CorrectMomentsOfAnIncreasingBuffer) {
const float kInput[] = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f};
const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
const float expected_mean[kInputLength] =
{0.2f, 0.6f, 1.2f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
const float expected_mean_squares[kInputLength] =
{0.2f, 1.f, 2.8f, 6.f, 11.f, 18.f, 27.f, 38.f, 51.f};
CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
expected_mean_squares);
}
TEST_F(MovingMomentsTest, CorrectMomentsOfADecreasingBuffer) {
const float kInput[] =
{-1.f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f, -8.f, -9.f};
const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
const float expected_mean[kInputLength] =
{-0.2f, -0.6f, -1.2f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f};
const float expected_mean_squares[kInputLength] =
{0.2f, 1.f, 2.8f, 6.f, 11.f, 18.f, 27.f, 38.f, 51.f};
CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
expected_mean_squares);
}
TEST_F(MovingMomentsTest, CorrectMomentsOfAZeroMeanSequence) {
const size_t kMovingMomentsBufferLength = 4;
moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength));
const float kInput[] =
{1.f, -1.f, 1.f, -1.f, 1.f, -1.f, 1.f, -1.f, 1.f, -1.f};
const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
const float expected_mean[kInputLength] =
{0.25f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
const float expected_mean_squares[kInputLength] =
{0.25f, 0.5f, 0.75f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f};
CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
expected_mean_squares);
}
TEST_F(MovingMomentsTest, CorrectMomentsOfAnArbitraryBuffer) {
const float kInput[] =
{0.2f, 0.3f, 0.5f, 0.7f, 0.11f, 0.13f, 0.17f, 0.19f, 0.23f};
const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
const float expected_mean[kInputLength] =
{0.04f, 0.1f, 0.2f, 0.34f, 0.362f, 0.348f, 0.322f, 0.26f, 0.166f};
const float expected_mean_squares[kInputLength] =
{0.008f, 0.026f, 0.076f, 0.174f, 0.1764f, 0.1718f, 0.1596f, 0.1168f,
0.0294f};
CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
expected_mean_squares);
}
TEST_F(MovingMomentsTest, MutipleCalculateMomentsCalls) {
const float kInputFirstCall[] =
{0.2f, 0.3f, 0.5f, 0.7f, 0.11f, 0.13f, 0.17f, 0.19f, 0.23f};
const size_t kInputFirstCallLength = sizeof(kInputFirstCall) /
sizeof(kInputFirstCall[0]);
const float kInputSecondCall[] = {0.29f, 0.31f};
const size_t kInputSecondCallLength = sizeof(kInputSecondCall) /
sizeof(kInputSecondCall[0]);
const float kInputThirdCall[] = {0.37f, 0.41f, 0.43f, 0.47f};
const size_t kInputThirdCallLength = sizeof(kInputThirdCall) /
sizeof(kInputThirdCall[0]);
const float expected_mean_first_call[kInputFirstCallLength] =
{0.04f, 0.1f, 0.2f, 0.34f, 0.362f, 0.348f, 0.322f, 0.26f, 0.166f};
const float expected_mean_squares_first_call[kInputFirstCallLength] =
{0.008f, 0.026f, 0.076f, 0.174f, 0.1764f, 0.1718f, 0.1596f, 0.1168f,
0.0294f};
const float expected_mean_second_call[kInputSecondCallLength] =
{0.202f, 0.238f};
const float expected_mean_squares_second_call[kInputSecondCallLength] =
{0.0438f, 0.0596f};
const float expected_mean_third_call[kInputThirdCallLength] =
{0.278f, 0.322f, 0.362f, 0.398f};
const float expected_mean_squares_third_call[kInputThirdCallLength] =
{0.0812f, 0.1076f, 0.134f, 0.1614f};
CalculateMomentsAndVerify(kInputFirstCall, kInputFirstCallLength,
expected_mean_first_call, expected_mean_squares_first_call);
CalculateMomentsAndVerify(kInputSecondCall, kInputSecondCallLength,
expected_mean_second_call, expected_mean_squares_second_call);
CalculateMomentsAndVerify(kInputThirdCall, kInputThirdCallLength,
expected_mean_third_call, expected_mean_squares_third_call);
}
TEST_F(MovingMomentsTest,
VerifySampleBasedVsBlockBasedCalculation) {
const float kInput[] =
{0.2f, 0.3f, 0.5f, 0.7f, 0.11f, 0.13f, 0.17f, 0.19f, 0.23f};
const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
float output_mean_block_based[kInputLength];
float output_mean_squares_block_based[kInputLength];
float output_mean_sample_based;
float output_mean_squares_sample_based;
moving_moments_->CalculateMoments(
kInput, kInputLength, output_mean_block_based,
output_mean_squares_block_based);
moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength));
for (size_t i = 0; i < kInputLength; ++i) {
moving_moments_->CalculateMoments(
&kInput[i], 1, &output_mean_sample_based,
&output_mean_squares_sample_based);
EXPECT_FLOAT_EQ(output_mean_block_based[i], output_mean_sample_based);
EXPECT_FLOAT_EQ(output_mean_squares_block_based[i],
output_mean_squares_sample_based);
}
}
} // namespace webrtc

View File

@ -0,0 +1,12 @@
function [] = plotDetection(PCMfile, DATfile, fs, chunkSize)
%[] = plotDetection(PCMfile, DATfile, fs, chunkSize)
%
%Plots the signal alongside the detection values.
%
%PCMfile: The file of the input signal in PCM format.
%DATfile: The file containing the detection values in binary float format.
%fs: The sample rate of the signal in Hertz.
%chunkSize: The chunk size used to compute the detection values in seconds.
[x, tx] = readPCM(PCMfile, fs);
[d, td] = readDetection(DATfile, fs, chunkSize);
plot(tx, x, td, d);

View File

@ -0,0 +1,16 @@
function [d, t] = readDetection(file, fs, chunkSize)
%[d, t] = readDetection(file, fs, chunkSize)
%
%Reads a detection signal from a DAT file.
%
%d: The detection signal.
%t: The respective time vector.
%
%file: The DAT file where the detection signal is stored in float format.
%fs: The signal sample rate in Hertz.
%chunkSize: The chunk size used for the detection in seconds.
fid = fopen(file);
d = fread(fid, inf, 'float');
fclose(fid);
t = 0:(1 / fs):(length(d) * chunkSize - 1 / fs);
d = d(floor(t / chunkSize) + 1);

View File

@ -0,0 +1,16 @@
function [x, t] = readPCM(file, fs)
%[x, t] = readPCM(file, fs)
%
%Reads a signal from a PCM file.
%
%x: The read signal after normalization.
%t: The respective time vector.
%
%file: The PCM file where the signal is stored in int16 format.
%fs: The signal sample rate in Hertz.
fid = fopen(file);
x = fread(fid, inf, 'int16');
fclose(fid);
x = x - mean(x);
x = x / max(abs(x));
t = 0:(1 / fs):((length(x) - 1) / fs);

View File

@ -0,0 +1,173 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/transient_detector.h"
#include <assert.h>
#include <float.h>
#include <math.h>
#include <string.h>
#include "webrtc/modules/audio_processing/transient/common.h"
#include "webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h"
#include "webrtc/modules/audio_processing/transient/moving_moments.h"
#include "webrtc/modules/audio_processing/transient/wpd_tree.h"
namespace webrtc {
static const int kTransientLengthMs = 30;
static const int kChunksAtStartupLeftToDelete =
kTransientLengthMs / ts::kChunkSizeMs;
static const float kDetectThreshold = 16.f;
TransientDetector::TransientDetector(int sample_rate_hz)
: samples_per_chunk_(sample_rate_hz * ts::kChunkSizeMs / 1000),
last_first_moment_(),
last_second_moment_(),
chunks_at_startup_left_to_delete_(kChunksAtStartupLeftToDelete),
reference_energy_(1.f),
using_reference_(false) {
assert(sample_rate_hz == ts::kSampleRate8kHz ||
sample_rate_hz == ts::kSampleRate16kHz ||
sample_rate_hz == ts::kSampleRate32kHz ||
sample_rate_hz == ts::kSampleRate48kHz);
int samples_per_transient = sample_rate_hz * kTransientLengthMs / 1000;
// Adjustment to avoid data loss while downsampling, making
// |samples_per_chunk_| and |samples_per_transient| always divisible by
// |kLeaves|.
samples_per_chunk_ -= samples_per_chunk_ % kLeaves;
samples_per_transient -= samples_per_transient % kLeaves;
tree_leaves_data_length_ = samples_per_chunk_ / kLeaves;
wpd_tree_.reset(new WPDTree(samples_per_chunk_,
kDaubechies8HighPassCoefficients,
kDaubechies8LowPassCoefficients,
kDaubechies8CoefficientsLength,
kLevels));
for (size_t i = 0; i < kLeaves; ++i) {
moving_moments_[i].reset(
new MovingMoments(samples_per_transient / kLeaves));
}
first_moments_.reset(new float[tree_leaves_data_length_]);
second_moments_.reset(new float[tree_leaves_data_length_]);
for (int i = 0; i < kChunksAtStartupLeftToDelete; ++i) {
previous_results_.push_back(0.f);
}
}
TransientDetector::~TransientDetector() {}
float TransientDetector::Detect(const float* data,
size_t data_length,
const float* reference_data,
size_t reference_length) {
assert(data && data_length == samples_per_chunk_);
// TODO(aluebs): Check if these errors can logically happen and if not assert
// on them.
if (wpd_tree_->Update(data, samples_per_chunk_) != 0) {
return -1.f;
}
float result = 0.f;
for (size_t i = 0; i < kLeaves; ++i) {
WPDNode* leaf = wpd_tree_->NodeAt(kLevels, i);
moving_moments_[i]->CalculateMoments(leaf->data(),
tree_leaves_data_length_,
first_moments_.get(),
second_moments_.get());
// Add value delayed (Use the last moments from the last call to Detect).
float unbiased_data = leaf->data()[0] - last_first_moment_[i];
result +=
unbiased_data * unbiased_data / (last_second_moment_[i] + FLT_MIN);
// Add new values.
for (size_t j = 1; j < tree_leaves_data_length_; ++j) {
unbiased_data = leaf->data()[j] - first_moments_[j - 1];
result +=
unbiased_data * unbiased_data / (second_moments_[j - 1] + FLT_MIN);
}
last_first_moment_[i] = first_moments_[tree_leaves_data_length_ - 1];
last_second_moment_[i] = second_moments_[tree_leaves_data_length_ - 1];
}
result /= tree_leaves_data_length_;
result *= ReferenceDetectionValue(reference_data, reference_length);
if (chunks_at_startup_left_to_delete_ > 0) {
chunks_at_startup_left_to_delete_--;
result = 0.f;
}
if (result >= kDetectThreshold) {
result = 1.f;
} else {
// Get proportional value.
// Proportion achieved with a squared raised cosine function with domain
// [0, kDetectThreshold) and image [0, 1), it's always increasing.
const float horizontal_scaling = ts::kPi / kDetectThreshold;
const float kHorizontalShift = ts::kPi;
const float kVerticalScaling = 0.5f;
const float kVerticalShift = 1.f;
result = (cos(result * horizontal_scaling + kHorizontalShift)
+ kVerticalShift) * kVerticalScaling;
result *= result;
}
previous_results_.pop_front();
previous_results_.push_back(result);
// In the current implementation we return the max of the current result and
// the previous results, so the high results have a width equals to
// |transient_length|.
return *std::max_element(previous_results_.begin(), previous_results_.end());
}
// Looks for the highest slope and compares it with the previous ones.
// An exponential transformation takes this to the [0, 1] range. This value is
// multiplied by the detection result to avoid false positives.
float TransientDetector::ReferenceDetectionValue(const float* data,
size_t length) {
if (data == NULL) {
using_reference_ = false;
return 1.f;
}
static const float kEnergyRatioThreshold = 0.2f;
static const float kReferenceNonLinearity = 20.f;
static const float kMemory = 0.99f;
float reference_energy = 0.f;
for (size_t i = 1; i < length; ++i) {
reference_energy += data[i] * data[i];
}
if (reference_energy == 0.f) {
using_reference_ = false;
return 1.f;
}
assert(reference_energy_ != 0);
float result = 1.f / (1.f + exp(kReferenceNonLinearity *
(kEnergyRatioThreshold -
reference_energy / reference_energy_)));
reference_energy_ =
kMemory * reference_energy_ + (1.f - kMemory) * reference_energy;
using_reference_ = true;
return result;
}
} // namespace webrtc

View File

@ -0,0 +1,87 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_
#include <deque>
#include "webrtc/modules/audio_processing/transient/moving_moments.h"
#include "webrtc/modules/audio_processing/transient/wpd_tree.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
namespace webrtc {
// This is an implementation of the transient detector described in "Causal
// Wavelet based transient detector".
// Calculates the log-likelihood of a transient to happen on a signal at any
// given time based on the previous samples; it uses a WPD tree to analyze the
// signal. It preserves its state, so it can be multiple-called.
class TransientDetector {
public:
// TODO(chadan): The only supported wavelet is Daubechies 8 using a WPD tree
// of 3 levels. Make an overloaded constructor to allow different wavelets and
// depths of the tree. When needed.
// Creates a wavelet based transient detector.
TransientDetector(int sample_rate_hz);
~TransientDetector();
// Calculates the log-likelihood of the existence of a transient in |data|.
// |data_length| has to be equal to |samples_per_chunk_|.
// Returns a value between 0 and 1, as a non linear representation of this
// likelihood.
// Returns a negative value on error.
float Detect(const float* data,
size_t data_length,
const float* reference_data,
size_t reference_length);
bool using_reference() { return using_reference_; }
private:
float ReferenceDetectionValue(const float* data, size_t length);
static const size_t kLevels = 3;
static const size_t kLeaves = 1 << kLevels;
size_t samples_per_chunk_;
scoped_ptr<WPDTree> wpd_tree_;
size_t tree_leaves_data_length_;
// A MovingMoments object is needed for each leaf in the WPD tree.
scoped_ptr<MovingMoments> moving_moments_[kLeaves];
scoped_ptr<float[]> first_moments_;
scoped_ptr<float[]> second_moments_;
// Stores the last calculated moments from the previous detection.
float last_first_moment_[kLeaves];
float last_second_moment_[kLeaves];
// We keep track of the previous results from the previous chunks, so it can
// be used to effectively give results according to the |transient_length|.
std::deque<float> previous_results_;
// Number of chunks that are going to return only zeros at the beginning of
// the detection. It helps to avoid infs and nans due to the lack of
// information.
int chunks_at_startup_left_to_delete_;
float reference_energy_;
bool using_reference_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_

View File

@ -0,0 +1,104 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/transient_detector.h"
#include <sstream>
#include <string>
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/modules/audio_processing/transient/common.h"
#include "webrtc/modules/audio_processing/transient/file_utils.h"
#include "webrtc/system_wrappers/interface/file_wrapper.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/test/testsupport/fileutils.h"
#include "webrtc/typedefs.h"
namespace webrtc {
static const int kSampleRatesHz[] = {ts::kSampleRate8kHz,
ts::kSampleRate16kHz,
ts::kSampleRate32kHz,
ts::kSampleRate48kHz};
static const size_t kNumberOfSampleRates =
sizeof(kSampleRatesHz) / sizeof(*kSampleRatesHz);
// This test is for the correctness of the transient detector.
// Checks the results comparing them with the ones stored in the detect files in
// the directory: resources/audio_processing/transient/
// The files contain all the results in double precision (Little endian).
// The audio files used with different sample rates are stored in the same
// directory.
TEST(TransientDetectorTest, CorrectnessBasedOnFiles) {
for (size_t i = 0; i < kNumberOfSampleRates; ++i) {
int sample_rate_hz = kSampleRatesHz[i];
// Prepare detect file.
std::stringstream detect_file_name;
detect_file_name << "audio_processing/transient/detect"
<< (sample_rate_hz / 1000) << "kHz";
scoped_ptr<FileWrapper> detect_file(FileWrapper::Create());
detect_file->OpenFile(
test::ResourcePath(detect_file_name.str(), "dat").c_str(),
true, // Read only.
false, // No loop.
false); // No text.
bool file_opened = detect_file->Open();
ASSERT_TRUE(file_opened) << "File could not be opened.\n"
<< detect_file_name.str().c_str();
// Prepare audio file.
std::stringstream audio_file_name;
audio_file_name << "audio_processing/transient/audio"
<< (sample_rate_hz / 1000) << "kHz";
scoped_ptr<FileWrapper> audio_file(FileWrapper::Create());
audio_file->OpenFile(
test::ResourcePath(audio_file_name.str(), "pcm").c_str(),
true, // Read only.
false, // No loop.
false); // No text.
// Create detector.
TransientDetector detector(sample_rate_hz);
const size_t buffer_length = sample_rate_hz * ts::kChunkSizeMs / 1000;
scoped_ptr<float[]> buffer(new float[buffer_length]);
const float kTolerance = 0.01f;
size_t frames_read = 0;
while (ReadInt16FromFileToFloatBuffer(audio_file.get(),
buffer_length,
buffer.get()) == buffer_length) {
++frames_read;
float detector_value =
detector.Detect(buffer.get(), buffer_length, NULL, 0);
double file_value;
ASSERT_EQ(1u, ReadDoubleBufferFromFile(detect_file.get(), 1, &file_value))
<< "Detect test file is malformed.\n";
// Compare results with data from the matlab test file.
EXPECT_NEAR(file_value, detector_value, kTolerance) << "Frame: "
<< frames_read;
}
detect_file->CloseFile();
audio_file->CloseFile();
}
}
} // namespace webrtc

View File

@ -0,0 +1,250 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
#include <stdlib.h>
#include <stdio.h>
#include <string>
#include "gflags/gflags.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/modules/audio_processing/agc/agc.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/test/testsupport/fileutils.h"
#include "webrtc/typedefs.h"
DEFINE_string(in_file_name, "", "PCM file that contains the signal.");
DEFINE_string(detection_file_name,
"",
"PCM file that contains the detection signal.");
DEFINE_string(reference_file_name,
"",
"PCM file that contains the reference signal.");
static bool ValidatePositiveInt(const char* flagname, int32_t value) {
if (value <= 0) {
printf("%s must be a positive integer.\n", flagname);
return false;
}
return true;
}
DEFINE_int32(chunk_size_ms,
10,
"Time between each chunk of samples in milliseconds.");
static const bool chunk_size_ms_dummy =
google::RegisterFlagValidator(&FLAGS_chunk_size_ms, &ValidatePositiveInt);
DEFINE_int32(sample_rate_hz,
16000,
"Sampling frequency of the signal in Hertz.");
static const bool sample_rate_hz_dummy =
google::RegisterFlagValidator(&FLAGS_sample_rate_hz, &ValidatePositiveInt);
DEFINE_int32(detection_rate_hz,
0,
"Sampling frequency of the detection signal in Hertz.");
DEFINE_int32(num_channels, 1, "Number of channels.");
static const bool num_channels_dummy =
google::RegisterFlagValidator(&FLAGS_num_channels, &ValidatePositiveInt);
namespace webrtc {
const char kUsage[] =
"\nDetects and suppresses transients from file.\n\n"
"This application loads the signal from the in_file_name with a specific\n"
"num_channels and sample_rate_hz, the detection signal from the\n"
"detection_file_name with a specific detection_rate_hz, and the reference\n"
"signal from the reference_file_name with sample_rate_hz, divides them\n"
"into chunk_size_ms blocks, computes its voice value and depending on the\n"
"voice_threshold does the respective restoration. You can always get the\n"
"all-voiced or all-unvoiced cases by setting the voice_threshold to 0 or\n"
"1 respectively.\n\n";
// Read next buffers from the test files (signed 16-bit host-endian PCM
// format). audio_buffer has int16 samples, detection_buffer has float samples
// with range [-32768,32767], and reference_buffer has float samples with range
// [-1,1]. Return true iff all the buffers were filled completely.
bool ReadBuffers(FILE* in_file,
size_t audio_buffer_size,
int num_channels,
int16_t* audio_buffer,
FILE* detection_file,
size_t detection_buffer_size,
float* detection_buffer,
FILE* reference_file,
float* reference_buffer) {
scoped_ptr<int16_t[]> tmpbuf;
int16_t* read_ptr = audio_buffer;
if (num_channels > 1) {
tmpbuf.reset(new int16_t[num_channels * audio_buffer_size]);
read_ptr = tmpbuf.get();
}
if (fread(read_ptr,
sizeof(*read_ptr),
num_channels * audio_buffer_size,
in_file) != num_channels * audio_buffer_size) {
return false;
}
// De-interleave.
if (num_channels > 1) {
for (int i = 0; i < num_channels; ++i) {
for (size_t j = 0; j < audio_buffer_size; ++j) {
audio_buffer[i * audio_buffer_size + j] =
read_ptr[i + j * num_channels];
}
}
}
if (detection_file) {
scoped_ptr<int16_t[]> ibuf(new int16_t[detection_buffer_size]);
if (fread(ibuf.get(), sizeof(ibuf[0]), detection_buffer_size,
detection_file) != detection_buffer_size)
return false;
for (size_t i = 0; i < detection_buffer_size; ++i)
detection_buffer[i] = ibuf[i];
}
if (reference_file) {
scoped_ptr<int16_t[]> ibuf(new int16_t[audio_buffer_size]);
if (fread(ibuf.get(), sizeof(ibuf[0]), audio_buffer_size, reference_file)
!= audio_buffer_size)
return false;
S16ToFloat(ibuf.get(), audio_buffer_size, reference_buffer);
}
return true;
}
// Write a number of samples to an open signed 16-bit host-endian PCM file.
static void WritePCM(FILE* f,
size_t num_samples,
int num_channels,
const float* buffer) {
scoped_ptr<int16_t[]> ibuf(new int16_t[num_channels * num_samples]);
// Interleave.
for (int i = 0; i < num_channels; ++i) {
for (size_t j = 0; j < num_samples; ++j) {
ibuf[i + j * num_channels] = FloatS16ToS16(buffer[i * num_samples + j]);
}
}
fwrite(ibuf.get(), sizeof(ibuf[0]), num_channels * num_samples, f);
}
// This application tests the transient suppression by providing a processed
// PCM file, which has to be listened to in order to evaluate the
// performance.
// It gets an audio file, and its voice gain information, and the suppressor
// process it giving the output file "suppressed_keystrokes.pcm".
void void_main() {
// TODO(aluebs): Remove all FileWrappers.
// Prepare the input file.
FILE* in_file = fopen(FLAGS_in_file_name.c_str(), "rb");
ASSERT_TRUE(in_file != NULL);
// Prepare the detection file.
FILE* detection_file = NULL;
if (FLAGS_detection_file_name != "") {
detection_file = fopen(FLAGS_detection_file_name.c_str(), "rb");
}
// Prepare the reference file.
FILE* reference_file = NULL;
if (FLAGS_reference_file_name != "") {
reference_file = fopen(FLAGS_reference_file_name.c_str(), "rb");
}
// Prepare the output file.
std::string out_file_name = test::OutputPath() + "suppressed_keystrokes.pcm";
FILE* out_file = fopen(out_file_name.c_str(), "wb");
ASSERT_TRUE(out_file != NULL);
int detection_rate_hz = FLAGS_detection_rate_hz;
if (detection_rate_hz == 0) {
detection_rate_hz = FLAGS_sample_rate_hz;
}
Agc agc;
TransientSuppressor suppressor;
suppressor.Initialize(
FLAGS_sample_rate_hz, detection_rate_hz, FLAGS_num_channels);
const size_t audio_buffer_size =
FLAGS_chunk_size_ms * FLAGS_sample_rate_hz / 1000;
const size_t detection_buffer_size =
FLAGS_chunk_size_ms * detection_rate_hz / 1000;
// int16 and float variants of the same data.
scoped_ptr<int16_t[]> audio_buffer_i(
new int16_t[FLAGS_num_channels * audio_buffer_size]);
scoped_ptr<float[]> audio_buffer_f(
new float[FLAGS_num_channels * audio_buffer_size]);
scoped_ptr<float[]> detection_buffer, reference_buffer;
if (detection_file)
detection_buffer.reset(new float[detection_buffer_size]);
if (reference_file)
reference_buffer.reset(new float[audio_buffer_size]);
while (ReadBuffers(in_file,
audio_buffer_size,
FLAGS_num_channels,
audio_buffer_i.get(),
detection_file,
detection_buffer_size,
detection_buffer.get(),
reference_file,
reference_buffer.get())) {
ASSERT_EQ(0,
agc.Process(audio_buffer_i.get(),
static_cast<int>(audio_buffer_size),
FLAGS_sample_rate_hz))
<< "The AGC could not process the frame";
for (size_t i = 0; i < FLAGS_num_channels * audio_buffer_size; ++i) {
audio_buffer_f[i] = audio_buffer_i[i];
}
ASSERT_EQ(0,
suppressor.Suppress(audio_buffer_f.get(),
audio_buffer_size,
FLAGS_num_channels,
detection_buffer.get(),
detection_buffer_size,
reference_buffer.get(),
audio_buffer_size,
agc.voice_probability(),
true))
<< "The transient suppressor could not suppress the frame";
// Write result to out file.
WritePCM(
out_file, audio_buffer_size, FLAGS_num_channels, audio_buffer_f.get());
}
fclose(in_file);
if (detection_file) {
fclose(detection_file);
}
if (reference_file) {
fclose(reference_file);
}
fclose(out_file);
}
} // namespace webrtc
int main(int argc, char* argv[]) {
google::SetUsageMessage(webrtc::kUsage);
google::ParseCommandLineFlags(&argc, &argv, true);
webrtc::void_main();
return 0;
}

View File

@ -0,0 +1,424 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
#include <math.h>
#include <string.h>
#include <cmath>
#include <complex>
#include <deque>
#include <set>
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/transient/common.h"
#include "webrtc/modules/audio_processing/transient/transient_detector.h"
#include "webrtc/modules/audio_processing/ns/windows_private.h"
extern "C" {
#include "webrtc/modules/audio_processing/utility/fft4g.h"
}
#include "webrtc/system_wrappers/interface/logging.h"
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/typedefs.h"
namespace webrtc {
static const float kMeanIIRCoefficient = 0.5f;
static const float kVoiceThreshold = 0.02f;
// TODO(aluebs): Check if these values work also for 48kHz.
static const size_t kMinVoiceBin = 3;
static const size_t kMaxVoiceBin = 60;
namespace {
float ComplexMagnitude(float a, float b) {
return std::abs(a) + std::abs(b);
}
}
TransientSuppressor::TransientSuppressor()
: data_length_(0),
detection_length_(0),
analysis_length_(0),
buffer_delay_(0),
complex_analysis_length_(0),
num_channels_(0),
window_(NULL),
detector_smoothed_(0.f),
keypress_counter_(0),
chunks_since_keypress_(0),
detection_enabled_(false),
suppression_enabled_(false),
use_hard_restoration_(false),
chunks_since_voice_change_(0),
seed_(182),
using_reference_(false) {
}
TransientSuppressor::~TransientSuppressor() {}
int TransientSuppressor::Initialize(int sample_rate_hz,
int detection_rate_hz,
int num_channels) {
switch (sample_rate_hz) {
case ts::kSampleRate8kHz:
analysis_length_ = 128u;
window_ = kBlocks80w128;
break;
case ts::kSampleRate16kHz:
analysis_length_ = 256u;
window_ = kBlocks160w256;
break;
case ts::kSampleRate32kHz:
analysis_length_ = 512u;
window_ = kBlocks320w512;
break;
case ts::kSampleRate48kHz:
analysis_length_ = 1024u;
window_ = kBlocks480w1024;
break;
default:
return -1;
}
if (detection_rate_hz != ts::kSampleRate8kHz &&
detection_rate_hz != ts::kSampleRate16kHz &&
detection_rate_hz != ts::kSampleRate32kHz &&
detection_rate_hz != ts::kSampleRate48kHz) {
return -1;
}
if (num_channels <= 0) {
return -1;
}
detector_.reset(new TransientDetector(detection_rate_hz));
data_length_ = sample_rate_hz * ts::kChunkSizeMs / 1000;
if (data_length_ > analysis_length_) {
assert(false);
return -1;
}
buffer_delay_ = analysis_length_ - data_length_;
complex_analysis_length_ = analysis_length_ / 2 + 1;
assert(complex_analysis_length_ >= kMaxVoiceBin);
num_channels_ = num_channels;
in_buffer_.reset(new float[analysis_length_ * num_channels_]);
memset(in_buffer_.get(),
0,
analysis_length_ * num_channels_ * sizeof(in_buffer_[0]));
detection_length_ = detection_rate_hz * ts::kChunkSizeMs / 1000;
detection_buffer_.reset(new float[detection_length_]);
memset(detection_buffer_.get(),
0,
detection_length_ * sizeof(detection_buffer_[0]));
out_buffer_.reset(new float[analysis_length_ * num_channels_]);
memset(out_buffer_.get(),
0,
analysis_length_ * num_channels_ * sizeof(out_buffer_[0]));
// ip[0] must be zero to trigger initialization using rdft().
size_t ip_length = 2 + sqrtf(analysis_length_);
ip_.reset(new int[ip_length]());
memset(ip_.get(), 0, ip_length * sizeof(ip_[0]));
wfft_.reset(new float[complex_analysis_length_ - 1]);
memset(wfft_.get(), 0, (complex_analysis_length_ - 1) * sizeof(wfft_[0]));
spectral_mean_.reset(new float[complex_analysis_length_ * num_channels_]);
memset(spectral_mean_.get(),
0,
complex_analysis_length_ * num_channels_ * sizeof(spectral_mean_[0]));
fft_buffer_.reset(new float[analysis_length_ + 2]);
memset(fft_buffer_.get(), 0, (analysis_length_ + 2) * sizeof(fft_buffer_[0]));
magnitudes_.reset(new float[complex_analysis_length_]);
memset(magnitudes_.get(),
0,
complex_analysis_length_ * sizeof(magnitudes_[0]));
mean_factor_.reset(new float[complex_analysis_length_]);
static const float kFactorHeight = 10.f;
static const float kLowSlope = 1.f;
static const float kHighSlope = 0.3f;
for (size_t i = 0; i < complex_analysis_length_; ++i) {
mean_factor_[i] =
kFactorHeight /
(1.f + exp(kLowSlope * static_cast<int>(i - kMinVoiceBin))) +
kFactorHeight /
(1.f + exp(kHighSlope * static_cast<int>(kMaxVoiceBin - i)));
}
detector_smoothed_ = 0.f;
keypress_counter_ = 0;
chunks_since_keypress_ = 0;
detection_enabled_ = false;
suppression_enabled_ = false;
use_hard_restoration_ = false;
chunks_since_voice_change_ = 0;
seed_ = 182;
using_reference_ = false;
return 0;
}
int TransientSuppressor::Suppress(float* data,
size_t data_length,
int num_channels,
const float* detection_data,
size_t detection_length,
const float* reference_data,
size_t reference_length,
float voice_probability,
bool key_pressed) {
if (!data || data_length != data_length_ || num_channels != num_channels_ ||
detection_length != detection_length_ || voice_probability < 0 ||
voice_probability > 1) {
return -1;
}
UpdateKeypress(key_pressed);
UpdateBuffers(data);
int result = 0;
if (detection_enabled_) {
UpdateRestoration(voice_probability);
if (!detection_data) {
// Use the input data of the first channel if special detection data is
// not supplied.
detection_data = &in_buffer_[buffer_delay_];
}
float detector_result = detector_->Detect(
detection_data, detection_length, reference_data, reference_length);
if (detector_result < 0) {
return -1;
}
using_reference_ = detector_->using_reference();
// |detector_smoothed_| follows the |detector_result| when this last one is
// increasing, but has an exponential decaying tail to be able to suppress
// the ringing of keyclicks.
float smooth_factor = using_reference_ ? 0.6 : 0.1;
detector_smoothed_ = detector_result >= detector_smoothed_
? detector_result
: smooth_factor * detector_smoothed_ +
(1 - smooth_factor) * detector_result;
for (int i = 0; i < num_channels_; ++i) {
Suppress(&in_buffer_[i * analysis_length_],
&spectral_mean_[i * complex_analysis_length_],
&out_buffer_[i * analysis_length_]);
}
}
// If the suppression isn't enabled, we use the in buffer to delay the signal
// appropriately. This also gives time for the out buffer to be refreshed with
// new data between detection and suppression getting enabled.
for (int i = 0; i < num_channels_; ++i) {
memcpy(&data[i * data_length_],
suppression_enabled_ ? &out_buffer_[i * analysis_length_]
: &in_buffer_[i * analysis_length_],
data_length_ * sizeof(*data));
}
return result;
}
// This should only be called when detection is enabled. UpdateBuffers() must
// have been called. At return, |out_buffer_| will be filled with the
// processed output.
void TransientSuppressor::Suppress(float* in_ptr,
float* spectral_mean,
float* out_ptr) {
// Go to frequency domain.
for (size_t i = 0; i < analysis_length_; ++i) {
// TODO(aluebs): Rename windows
fft_buffer_[i] = in_ptr[i] * window_[i];
}
WebRtc_rdft(analysis_length_, 1, fft_buffer_.get(), ip_.get(), wfft_.get());
// Since WebRtc_rdft puts R[n/2] in fft_buffer_[1], we move it to the end
// for convenience.
fft_buffer_[analysis_length_] = fft_buffer_[1];
fft_buffer_[analysis_length_ + 1] = 0.f;
fft_buffer_[1] = 0.f;
for (size_t i = 0; i < complex_analysis_length_; ++i) {
magnitudes_[i] = ComplexMagnitude(fft_buffer_[i * 2],
fft_buffer_[i * 2 + 1]);
}
// Restore audio if necessary.
if (suppression_enabled_) {
if (use_hard_restoration_) {
HardRestoration(spectral_mean);
} else {
SoftRestoration(spectral_mean);
}
}
// Update the spectral mean.
for (size_t i = 0; i < complex_analysis_length_; ++i) {
spectral_mean[i] = (1 - kMeanIIRCoefficient) * spectral_mean[i] +
kMeanIIRCoefficient * magnitudes_[i];
}
// Back to time domain.
// Put R[n/2] back in fft_buffer_[1].
fft_buffer_[1] = fft_buffer_[analysis_length_];
WebRtc_rdft(analysis_length_,
-1,
fft_buffer_.get(),
ip_.get(),
wfft_.get());
const float fft_scaling = 2.f / analysis_length_;
for (size_t i = 0; i < analysis_length_; ++i) {
out_ptr[i] += fft_buffer_[i] * window_[i] * fft_scaling;
}
}
void TransientSuppressor::UpdateKeypress(bool key_pressed) {
const int kKeypressPenalty = 1000 / ts::kChunkSizeMs;
const int kIsTypingThreshold = 1000 / ts::kChunkSizeMs;
const int kChunksUntilNotTyping = 4000 / ts::kChunkSizeMs; // 4 seconds.
if (key_pressed) {
keypress_counter_ += kKeypressPenalty;
chunks_since_keypress_ = 0;
detection_enabled_ = true;
}
keypress_counter_ = std::max(0, keypress_counter_ - 1);
if (keypress_counter_ > kIsTypingThreshold) {
if (!suppression_enabled_) {
LOG(LS_INFO) << "[ts] Transient suppression is now enabled.";
}
suppression_enabled_ = true;
keypress_counter_ = 0;
}
if (detection_enabled_ &&
++chunks_since_keypress_ > kChunksUntilNotTyping) {
if (suppression_enabled_) {
LOG(LS_INFO) << "[ts] Transient suppression is now disabled.";
}
detection_enabled_ = false;
suppression_enabled_ = false;
keypress_counter_ = 0;
}
}
void TransientSuppressor::UpdateRestoration(float voice_probability) {
const int kHardRestorationOffsetDelay = 3;
const int kHardRestorationOnsetDelay = 80;
bool not_voiced = voice_probability < kVoiceThreshold;
if (not_voiced == use_hard_restoration_) {
chunks_since_voice_change_ = 0;
} else {
++chunks_since_voice_change_;
if ((use_hard_restoration_ &&
chunks_since_voice_change_ > kHardRestorationOffsetDelay) ||
(!use_hard_restoration_ &&
chunks_since_voice_change_ > kHardRestorationOnsetDelay)) {
use_hard_restoration_ = not_voiced;
chunks_since_voice_change_ = 0;
}
}
}
// Shift buffers to make way for new data. Must be called after
// |detection_enabled_| is updated by UpdateKeypress().
void TransientSuppressor::UpdateBuffers(float* data) {
// TODO(aluebs): Change to ring buffer.
memmove(in_buffer_.get(),
&in_buffer_[data_length_],
(buffer_delay_ + (num_channels_ - 1) * analysis_length_) *
sizeof(in_buffer_[0]));
// Copy new chunk to buffer.
for (int i = 0; i < num_channels_; ++i) {
memcpy(&in_buffer_[buffer_delay_ + i * analysis_length_],
&data[i * data_length_],
data_length_ * sizeof(*data));
}
if (detection_enabled_) {
// Shift previous chunk in out buffer.
memmove(out_buffer_.get(),
&out_buffer_[data_length_],
(buffer_delay_ + (num_channels_ - 1) * analysis_length_) *
sizeof(out_buffer_[0]));
// Initialize new chunk in out buffer.
for (int i = 0; i < num_channels_; ++i) {
memset(&out_buffer_[buffer_delay_ + i * analysis_length_],
0,
data_length_ * sizeof(out_buffer_[0]));
}
}
}
// Restores the unvoiced signal if a click is present.
// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds
// the spectral mean. The attenuation depends on |detector_smoothed_|.
// If a restoration takes place, the |magnitudes_| are updated to the new value.
void TransientSuppressor::HardRestoration(float* spectral_mean) {
const float detector_result =
1.f - pow(1.f - detector_smoothed_, using_reference_ ? 200.f : 50.f);
// To restore, we get the peaks in the spectrum. If higher than the previous
// spectral mean we adjust them.
for (size_t i = 0; i < complex_analysis_length_; ++i) {
if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0) {
// RandU() generates values on [0, int16::max()]
const float phase = 2 * ts::kPi * WebRtcSpl_RandU(&seed_) /
std::numeric_limits<int16_t>::max();
const float scaled_mean = detector_result * spectral_mean[i];
fft_buffer_[i * 2] = (1 - detector_result) * fft_buffer_[i * 2] +
scaled_mean * cosf(phase);
fft_buffer_[i * 2 + 1] = (1 - detector_result) * fft_buffer_[i * 2 + 1] +
scaled_mean * sinf(phase);
magnitudes_[i] = magnitudes_[i] -
detector_result * (magnitudes_[i] - spectral_mean[i]);
}
}
}
// Restores the voiced signal if a click is present.
// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds
// the spectral mean and that is lower than some function of the current block
// frequency mean. The attenuation depends on |detector_smoothed_|.
// If a restoration takes place, the |magnitudes_| are updated to the new value.
void TransientSuppressor::SoftRestoration(float* spectral_mean) {
// Get the spectral magnitude mean of the current block.
float block_frequency_mean = 0;
for (size_t i = kMinVoiceBin; i < kMaxVoiceBin; ++i) {
block_frequency_mean += magnitudes_[i];
}
block_frequency_mean /= (kMaxVoiceBin - kMinVoiceBin);
// To restore, we get the peaks in the spectrum. If higher than the
// previous spectral mean and lower than a factor of the block mean
// we adjust them. The factor is a double sigmoid that has a minimum in the
// voice frequency range (300Hz - 3kHz).
for (size_t i = 0; i < complex_analysis_length_; ++i) {
if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0 &&
(using_reference_ ||
magnitudes_[i] < block_frequency_mean * mean_factor_[i])) {
const float new_magnitude =
magnitudes_[i] -
detector_smoothed_ * (magnitudes_[i] - spectral_mean[i]);
const float magnitude_ratio = new_magnitude / magnitudes_[i];
fft_buffer_[i * 2] *= magnitude_ratio;
fft_buffer_[i * 2 + 1] *= magnitude_ratio;
magnitudes_[i] = new_magnitude;
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,120 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
#include <deque>
#include <set>
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
#include "webrtc/test/testsupport/gtest_prod_util.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class TransientDetector;
// Detects transients in an audio stream and suppress them using a simple
// restoration algorithm that attenuates unexpected spikes in the spectrum.
class TransientSuppressor {
public:
TransientSuppressor();
~TransientSuppressor();
int Initialize(int sample_rate_hz, int detector_rate_hz, int num_channels);
// Processes a |data| chunk, and returns it with keystrokes suppressed from
// it. The float format is assumed to be int16 ranged. If there are more than
// one channel, the chunks are concatenated one after the other in |data|.
// |data_length| must be equal to |data_length_|.
// |num_channels| must be equal to |num_channels_|.
// A sub-band, ideally the higher, can be used as |detection_data|. If it is
// NULL, |data| is used for the detection too. The |detection_data| is always
// assumed mono.
// If a reference signal (e.g. keyboard microphone) is available, it can be
// passed in as |reference_data|. It is assumed mono and must have the same
// length as |data|. NULL is accepted if unavailable.
// This suppressor performs better if voice information is available.
// |voice_probability| is the probability of voice being present in this chunk
// of audio. If voice information is not available, |voice_probability| must
// always be set to 1.
// |key_pressed| determines if a key was pressed on this audio chunk.
// Returns 0 on success and -1 otherwise.
int Suppress(float* data,
size_t data_length,
int num_channels,
const float* detection_data,
size_t detection_length,
const float* reference_data,
size_t reference_length,
float voice_probability,
bool key_pressed);
private:
FRIEND_TEST_ALL_PREFIXES(TransientSuppressorTest,
TypingDetectionLogicWorksAsExpectedForMono);
void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr);
void UpdateKeypress(bool key_pressed);
void UpdateRestoration(float voice_probability);
void UpdateBuffers(float* data);
void HardRestoration(float* spectral_mean);
void SoftRestoration(float* spectral_mean);
scoped_ptr<TransientDetector> detector_;
size_t data_length_;
size_t detection_length_;
size_t analysis_length_;
size_t buffer_delay_;
size_t complex_analysis_length_;
int num_channels_;
// Input buffer where the original samples are stored.
scoped_ptr<float[]> in_buffer_;
scoped_ptr<float[]> detection_buffer_;
// Output buffer where the restored samples are stored.
scoped_ptr<float[]> out_buffer_;
// Arrays for fft.
scoped_ptr<int[]> ip_;
scoped_ptr<float[]> wfft_;
scoped_ptr<float[]> spectral_mean_;
// Stores the data for the fft.
scoped_ptr<float[]> fft_buffer_;
scoped_ptr<float[]> magnitudes_;
const float* window_;
scoped_ptr<float[]> mean_factor_;
float detector_smoothed_;
int keypress_counter_;
int chunks_since_keypress_;
bool detection_enabled_;
bool suppression_enabled_;
bool use_hard_restoration_;
int chunks_since_voice_change_;
uint32_t seed_;
bool using_reference_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_

View File

@ -0,0 +1,85 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/modules/audio_processing/transient/common.h"
namespace webrtc {
TEST(TransientSuppressorTest, TypingDetectionLogicWorksAsExpectedForMono) {
static const int kNumChannels = 1;
TransientSuppressor ts;
ts.Initialize(ts::kSampleRate16kHz, ts::kSampleRate16kHz, kNumChannels);
// Each key-press enables detection.
EXPECT_FALSE(ts.detection_enabled_);
ts.UpdateKeypress(true);
EXPECT_TRUE(ts.detection_enabled_);
// It takes four seconds without any key-press to disable the detection
for (int time_ms = 0; time_ms < 3990; time_ms += ts::kChunkSizeMs) {
ts.UpdateKeypress(false);
EXPECT_TRUE(ts.detection_enabled_);
}
ts.UpdateKeypress(false);
EXPECT_FALSE(ts.detection_enabled_);
// Key-presses that are more than a second apart from each other don't enable
// suppression.
for (int i = 0; i < 100; ++i) {
EXPECT_FALSE(ts.suppression_enabled_);
ts.UpdateKeypress(true);
EXPECT_TRUE(ts.detection_enabled_);
EXPECT_FALSE(ts.suppression_enabled_);
for (int time_ms = 0; time_ms < 990; time_ms += ts::kChunkSizeMs) {
ts.UpdateKeypress(false);
EXPECT_TRUE(ts.detection_enabled_);
EXPECT_FALSE(ts.suppression_enabled_);
}
ts.UpdateKeypress(false);
}
// Two consecutive key-presses is enough to enable the suppression.
ts.UpdateKeypress(true);
EXPECT_FALSE(ts.suppression_enabled_);
ts.UpdateKeypress(true);
EXPECT_TRUE(ts.suppression_enabled_);
// Key-presses that are less than a second apart from each other don't disable
// detection nor suppression.
for (int i = 0; i < 100; ++i) {
for (int time_ms = 0; time_ms < 1000; time_ms += ts::kChunkSizeMs) {
ts.UpdateKeypress(false);
EXPECT_TRUE(ts.detection_enabled_);
EXPECT_TRUE(ts.suppression_enabled_);
}
ts.UpdateKeypress(true);
EXPECT_TRUE(ts.detection_enabled_);
EXPECT_TRUE(ts.suppression_enabled_);
}
// It takes four seconds without any key-press to disable the detection and
// suppression.
for (int time_ms = 0; time_ms < 3990; time_ms += ts::kChunkSizeMs) {
ts.UpdateKeypress(false);
EXPECT_TRUE(ts.detection_enabled_);
EXPECT_TRUE(ts.suppression_enabled_);
}
for (int time_ms = 0; time_ms < 1000; time_ms += ts::kChunkSizeMs) {
ts.UpdateKeypress(false);
EXPECT_FALSE(ts.detection_enabled_);
EXPECT_FALSE(ts.suppression_enabled_);
}
}
} // namespace webrtc

Some files were not shown because too many files have changed in this diff Show More