Merge audio_processing changes.
R=aluebs@webrtc.org, bjornv@webrtc.org BUG= Review URL: https://webrtc-codereview.appspot.com/32769004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7893 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
fb108b5a28
commit
788acd17ad
3
.gitignore
vendored
3
.gitignore
vendored
@ -48,8 +48,7 @@
|
||||
/links.db
|
||||
/net
|
||||
/out
|
||||
/resources/*.*
|
||||
/resources/*/*.*
|
||||
/resources
|
||||
/talk/examples/android/bin
|
||||
/talk/examples/android/gen
|
||||
/talk/examples/android/libs
|
||||
|
1
resources/audio_processing/agc/agc_audio.pcm.sha1
Normal file
1
resources/audio_processing/agc/agc_audio.pcm.sha1
Normal file
@ -0,0 +1 @@
|
||||
10a52dc6d6f15242a1aa549205657f2834353673
|
@ -0,0 +1 @@
|
||||
61219028e15606a3adbbc61d393575ab36b4078b
|
1
resources/audio_processing/agc/agc_pitch_gain.dat.sha1
Normal file
1
resources/audio_processing/agc/agc_pitch_gain.dat.sha1
Normal file
@ -0,0 +1 @@
|
||||
ba0c6e93a5e6d351d95385699fb9a719b6a6d0cc
|
1
resources/audio_processing/agc/agc_pitch_lag.dat.sha1
Normal file
1
resources/audio_processing/agc/agc_pitch_lag.dat.sha1
Normal file
@ -0,0 +1 @@
|
||||
590c6fe033665d11fa70dbbbd3e7d8f0b8a616ce
|
@ -0,0 +1 @@
|
||||
3a5a28763e3ad5cd0f2833a90b685f4da97c2002
|
1
resources/audio_processing/agc/agc_vad.dat.sha1
Normal file
1
resources/audio_processing/agc/agc_vad.dat.sha1
Normal file
@ -0,0 +1 @@
|
||||
7cae05c6902812609fa23ac04037485503b0924d
|
1
resources/audio_processing/agc/agc_voicing_prob.dat.sha1
Normal file
1
resources/audio_processing/agc/agc_voicing_prob.dat.sha1
Normal file
@ -0,0 +1 @@
|
||||
b1ea860f0bfad3e86fedc43cd8752821e0d75a46
|
@ -0,0 +1 @@
|
||||
49402cfaa36be32320167a65c8e96f70548f5257
|
@ -0,0 +1 @@
|
||||
7c80af623675b2284f4081cfd2df9a0227bbc2a0
|
@ -0,0 +1 @@
|
||||
04155a7e186deb7524e3013476de3eaabd59a1f8
|
@ -0,0 +1 @@
|
||||
6c33b25be2eb9b441429aabf203d5b4a9e734c63
|
1
resources/audio_processing/transient/audio16kHz.pcm.sha1
Normal file
1
resources/audio_processing/transient/audio16kHz.pcm.sha1
Normal file
@ -0,0 +1 @@
|
||||
81cb7e547fad2894b5702fa571f9eb55ed6e1096
|
1
resources/audio_processing/transient/audio32kHz.pcm.sha1
Normal file
1
resources/audio_processing/transient/audio32kHz.pcm.sha1
Normal file
@ -0,0 +1 @@
|
||||
81cfcff6b0d70938fe74060ba0303504c31c6d7e
|
1
resources/audio_processing/transient/audio48kHz.pcm.sha1
Normal file
1
resources/audio_processing/transient/audio48kHz.pcm.sha1
Normal file
@ -0,0 +1 @@
|
||||
01278951e13675a3467782e1d2f18273c05eef50
|
1
resources/audio_processing/transient/audio8kHz.pcm.sha1
Normal file
1
resources/audio_processing/transient/audio8kHz.pcm.sha1
Normal file
@ -0,0 +1 @@
|
||||
5fcb4621ea0f50c3fc9a63e4720ff52631258437
|
@ -0,0 +1 @@
|
||||
35639dd1b73b678360897975a91a7c8af0be3644
|
@ -0,0 +1 @@
|
||||
c9d3d0b81262ffaba7d358ad534e6fcb27c00076
|
@ -0,0 +1 @@
|
||||
f46a3380c9285324e583965ef547fcaa1650f8b8
|
1
resources/audio_processing/transient/detect8kHz.dat.sha1
Normal file
1
resources/audio_processing/transient/detect8kHz.dat.sha1
Normal file
@ -0,0 +1 @@
|
||||
f625c14d134d69ad38b67295459406fc9947a705
|
@ -0,0 +1 @@
|
||||
c26083880cd227178917b4df230520dbfb9b9bb1
|
@ -0,0 +1 @@
|
||||
0eaaf21344b4b030d6c0fb6dcc419e7d3959a148
|
@ -0,0 +1 @@
|
||||
9781792dc39d7aada6418370246eef9f544ca47b
|
@ -0,0 +1 @@
|
||||
8b2bd11b591521178232aae598e6df0d001051c4
|
@ -0,0 +1 @@
|
||||
8a6c7ed696f9791f8cb5c5b061f07eb019affd49
|
1
resources/audio_processing/transient/wpd0.dat.sha1
Normal file
1
resources/audio_processing/transient/wpd0.dat.sha1
Normal file
@ -0,0 +1 @@
|
||||
7c01839f888fe6e10276e1819bd5207668345dcf
|
1
resources/audio_processing/transient/wpd1.dat.sha1
Normal file
1
resources/audio_processing/transient/wpd1.dat.sha1
Normal file
@ -0,0 +1 @@
|
||||
f7553df9abca91401715185d97d1d9c20a2ecb9b
|
1
resources/audio_processing/transient/wpd2.dat.sha1
Normal file
1
resources/audio_processing/transient/wpd2.dat.sha1
Normal file
@ -0,0 +1 @@
|
||||
0455d7042c64075e793285753a98f02268e6238b
|
1
resources/audio_processing/transient/wpd3.dat.sha1
Normal file
1
resources/audio_processing/transient/wpd3.dat.sha1
Normal file
@ -0,0 +1 @@
|
||||
941cc5d0bfccfd1d6bd68a1d882975202f22b6de
|
1
resources/audio_processing/transient/wpd4.dat.sha1
Normal file
1
resources/audio_processing/transient/wpd4.dat.sha1
Normal file
@ -0,0 +1 @@
|
||||
a16139b3750a13b62327e2a78ea008493a2b508b
|
1
resources/audio_processing/transient/wpd5.dat.sha1
Normal file
1
resources/audio_processing/transient/wpd5.dat.sha1
Normal file
@ -0,0 +1 @@
|
||||
6bf9272123656bc0561550a40734245709bbac10
|
1
resources/audio_processing/transient/wpd6.dat.sha1
Normal file
1
resources/audio_processing/transient/wpd6.dat.sha1
Normal file
@ -0,0 +1 @@
|
||||
6a2667c6c4b3794776af1dabacc3575791023168
|
1
resources/audio_processing/transient/wpd7.dat.sha1
Normal file
1
resources/audio_processing/transient/wpd7.dat.sha1
Normal file
@ -0,0 +1 @@
|
||||
620cf1f732c99003ff0e5d6ae3350c0a2ea2a9d7
|
@ -22,25 +22,52 @@ declare_args() {
|
||||
|
||||
source_set("audio_processing") {
|
||||
sources = [
|
||||
"aec/include/echo_cancellation.h",
|
||||
"aec/aec_core.c",
|
||||
"aec/aec_core.h",
|
||||
"aec/aec_core_internal.h",
|
||||
"aec/aec_rdft.c",
|
||||
"aec/aec_rdft.h",
|
||||
"aec/aec_resampler.c",
|
||||
"aec/aec_resampler.h",
|
||||
"aec/echo_cancellation.c",
|
||||
"aec/echo_cancellation_internal.h",
|
||||
"aec/aec_core.h",
|
||||
"aec/aec_core.c",
|
||||
"aec/aec_core_internal.h",
|
||||
"aec/aec_rdft.h",
|
||||
"aec/aec_rdft.c",
|
||||
"aec/aec_resampler.h",
|
||||
"aec/aec_resampler.c",
|
||||
"aecm/include/echo_control_mobile.h",
|
||||
"aecm/echo_control_mobile.c",
|
||||
"aec/include/echo_cancellation.h",
|
||||
"aecm/aecm_core.c",
|
||||
"aecm/aecm_core.h",
|
||||
"agc/include/gain_control.h",
|
||||
"aecm/echo_control_mobile.c",
|
||||
"aecm/include/echo_control_mobile.h",
|
||||
"agc/agc.cc",
|
||||
"agc/agc.h",
|
||||
"agc/agc_audio_proc.cc",
|
||||
"agc/agc_audio_proc.h",
|
||||
"agc/agc_audio_proc_internal.h",
|
||||
"agc/agc_manager_direct.cc",
|
||||
"agc/agc_manager_direct.h",
|
||||
"agc/analog_agc.c",
|
||||
"agc/analog_agc.h",
|
||||
"agc/circular_buffer.cc",
|
||||
"agc/circular_buffer.h",
|
||||
"agc/common.h",
|
||||
"agc/digital_agc.c",
|
||||
"agc/digital_agc.h",
|
||||
"agc/gain_map_internal.h",
|
||||
"agc/gmm.cc",
|
||||
"agc/gmm.h",
|
||||
"agc/histogram.cc",
|
||||
"agc/histogram.h",
|
||||
"agc/include/gain_control.h",
|
||||
"agc/noise_gmm_tables.h",
|
||||
"agc/pitch_based_vad.cc",
|
||||
"agc/pitch_based_vad.h",
|
||||
"agc/pitch_internal.cc",
|
||||
"agc/pitch_internal.h",
|
||||
"agc/pole_zero_filter.cc",
|
||||
"agc/pole_zero_filter.h",
|
||||
"agc/standalone_vad.cc",
|
||||
"agc/standalone_vad.h",
|
||||
"agc/utility.cc",
|
||||
"agc/utility.h",
|
||||
"agc/voice_gmm_tables.h",
|
||||
"audio_buffer.cc",
|
||||
"audio_buffer.h",
|
||||
"audio_processing_impl.cc",
|
||||
@ -67,6 +94,19 @@ source_set("audio_processing") {
|
||||
"rms_level.h",
|
||||
"splitting_filter.cc",
|
||||
"splitting_filter.h",
|
||||
"transient/common.h",
|
||||
"transient/daubechies_8_wavelet_coeffs.h",
|
||||
"transient/dyadic_decimator.h",
|
||||
"transient/moving_moments.cc",
|
||||
"transient/moving_moments.h",
|
||||
"transient/transient_detector.cc",
|
||||
"transient/transient_detector.h",
|
||||
"transient/transient_suppressor.cc",
|
||||
"transient/transient_suppressor.h",
|
||||
"transient/wpd_node.cc",
|
||||
"transient/wpd_node.h",
|
||||
"transient/wpd_tree.cc",
|
||||
"transient/wpd_tree.h",
|
||||
"typing_detection.cc",
|
||||
"typing_detection.h",
|
||||
"utility/delay_estimator.c",
|
||||
|
161
webrtc/modules/audio_processing/agc/agc.cc
Normal file
161
webrtc/modules/audio_processing/agc/agc.cc
Normal file
@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/agc.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/common_audio/resampler/include/resampler.h"
|
||||
#include "webrtc/modules/audio_processing/agc/agc_audio_proc.h"
|
||||
#include "webrtc/modules/audio_processing/agc/common.h"
|
||||
#include "webrtc/modules/audio_processing/agc/histogram.h"
|
||||
#include "webrtc/modules/audio_processing/agc/pitch_based_vad.h"
|
||||
#include "webrtc/modules/audio_processing/agc/standalone_vad.h"
|
||||
#include "webrtc/modules/audio_processing/agc/utility.h"
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "webrtc/system_wrappers/interface/compile_assert.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
const int kDefaultLevelDbfs = -18;
|
||||
const double kDefaultVoiceValue = 1.0;
|
||||
const int kNumAnalysisFrames = 100;
|
||||
const double kActivityThreshold = 0.3;
|
||||
|
||||
} // namespace
|
||||
|
||||
Agc::Agc()
|
||||
: target_level_loudness_(Dbfs2Loudness(kDefaultLevelDbfs)),
|
||||
last_voice_probability_(kDefaultVoiceValue),
|
||||
target_level_dbfs_(kDefaultLevelDbfs),
|
||||
standalone_vad_enabled_(true),
|
||||
histogram_(Histogram::Create(kNumAnalysisFrames)),
|
||||
inactive_histogram_(Histogram::Create()),
|
||||
audio_processing_(new AgcAudioProc()),
|
||||
pitch_based_vad_(new PitchBasedVad()),
|
||||
standalone_vad_(StandaloneVad::Create()),
|
||||
// Initialize to the most common resampling situation.
|
||||
resampler_(new Resampler(32000, kSampleRateHz, kResamplerSynchronous)) {
|
||||
}
|
||||
|
||||
Agc::~Agc() {}
|
||||
|
||||
float Agc::AnalyzePreproc(const int16_t* audio, int length) {
|
||||
assert(length > 0);
|
||||
int num_clipped = 0;
|
||||
for (int i = 0; i < length; ++i) {
|
||||
if (audio[i] == 32767 || audio[i] == -32768)
|
||||
++num_clipped;
|
||||
}
|
||||
return 1.0f * num_clipped / length;
|
||||
}
|
||||
|
||||
int Agc::Process(const int16_t* audio, int length, int sample_rate_hz) {
|
||||
assert(length == sample_rate_hz / 100);
|
||||
if (sample_rate_hz > 32000) {
|
||||
return -1;
|
||||
}
|
||||
// Resample to the required rate.
|
||||
int16_t resampled[kLength10Ms];
|
||||
const int16_t* resampled_ptr = audio;
|
||||
if (sample_rate_hz != kSampleRateHz) {
|
||||
if (resampler_->ResetIfNeeded(sample_rate_hz,
|
||||
kSampleRateHz,
|
||||
kResamplerSynchronous) != 0) {
|
||||
return -1;
|
||||
}
|
||||
resampler_->Push(audio, length, resampled, kLength10Ms, length);
|
||||
resampled_ptr = resampled;
|
||||
}
|
||||
assert(length == kLength10Ms);
|
||||
|
||||
if (standalone_vad_enabled_) {
|
||||
if (standalone_vad_->AddAudio(resampled_ptr, length) != 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
AudioFeatures features;
|
||||
audio_processing_->ExtractFeatures(resampled_ptr, length, &features);
|
||||
if (features.num_frames > 0) {
|
||||
if (features.silence) {
|
||||
// The other features are invalid, so update the histogram with an
|
||||
// arbitrary low value.
|
||||
for (int n = 0; n < features.num_frames; ++n)
|
||||
histogram_->Update(features.rms[n], 0.01);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Initialize to 0.5 which is a neutral value for combining probabilities,
|
||||
// in case the standalone-VAD is not enabled.
|
||||
double p_combined[] = {0.5, 0.5, 0.5, 0.5};
|
||||
COMPILE_ASSERT(sizeof(p_combined) / sizeof(p_combined[0]) == kMaxNumFrames,
|
||||
combined_probability_incorrect_size);
|
||||
if (standalone_vad_enabled_) {
|
||||
if (standalone_vad_->GetActivity(p_combined, kMaxNumFrames) < 0)
|
||||
return -1;
|
||||
}
|
||||
// If any other VAD is enabled it must be combined before calling the
|
||||
// pitch-based VAD.
|
||||
if (pitch_based_vad_->VoicingProbability(features, p_combined) < 0)
|
||||
return -1;
|
||||
for (int n = 0; n < features.num_frames; n++) {
|
||||
histogram_->Update(features.rms[n], p_combined[n]);
|
||||
last_voice_probability_ = p_combined[n];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool Agc::GetRmsErrorDb(int* error) {
|
||||
if (!error) {
|
||||
assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (histogram_->num_updates() < kNumAnalysisFrames) {
|
||||
// We haven't yet received enough frames.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (histogram_->AudioContent() < kNumAnalysisFrames * kActivityThreshold) {
|
||||
// We are likely in an inactive segment.
|
||||
return false;
|
||||
}
|
||||
|
||||
double loudness = Linear2Loudness(histogram_->CurrentRms());
|
||||
*error = std::floor(Loudness2Db(target_level_loudness_ - loudness) + 0.5);
|
||||
histogram_->Reset();
|
||||
return true;
|
||||
}
|
||||
|
||||
void Agc::Reset() {
|
||||
histogram_->Reset();
|
||||
}
|
||||
|
||||
int Agc::set_target_level_dbfs(int level) {
|
||||
// TODO(turajs): just some arbitrary sanity check. We can come up with better
|
||||
// limits. The upper limit should be chosen such that the risk of clipping is
|
||||
// low. The lower limit should not result in a too quiet signal.
|
||||
if (level >= 0 || level <= -100)
|
||||
return -1;
|
||||
target_level_dbfs_ = level;
|
||||
target_level_loudness_ = Dbfs2Loudness(level);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void Agc::EnableStandaloneVad(bool enable) {
|
||||
standalone_vad_enabled_ = enable;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
69
webrtc/modules/audio_processing/agc/agc.h
Normal file
69
webrtc/modules/audio_processing/agc/agc.h
Normal file
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
|
||||
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioFrame;
|
||||
class AgcAudioProc;
|
||||
class Histogram;
|
||||
class PitchBasedVad;
|
||||
class Resampler;
|
||||
class StandaloneVad;
|
||||
|
||||
class Agc {
|
||||
public:
|
||||
Agc();
|
||||
virtual ~Agc();
|
||||
|
||||
// Returns the proportion of samples in the buffer which are at full-scale
|
||||
// (and presumably clipped).
|
||||
virtual float AnalyzePreproc(const int16_t* audio, int length);
|
||||
// |audio| must be mono; in a multi-channel stream, provide the first (usually
|
||||
// left) channel.
|
||||
virtual int Process(const int16_t* audio, int length, int sample_rate_hz);
|
||||
|
||||
// Retrieves the difference between the target RMS level and the current
|
||||
// signal RMS level in dB. Returns true if an update is available and false
|
||||
// otherwise, in which case |error| should be ignored and no action taken.
|
||||
virtual bool GetRmsErrorDb(int* error);
|
||||
virtual void Reset();
|
||||
|
||||
virtual int set_target_level_dbfs(int level);
|
||||
virtual int target_level_dbfs() const { return target_level_dbfs_; }
|
||||
|
||||
virtual void EnableStandaloneVad(bool enable);
|
||||
virtual bool standalone_vad_enabled() const {
|
||||
return standalone_vad_enabled_;
|
||||
}
|
||||
|
||||
virtual double voice_probability() const { return last_voice_probability_; }
|
||||
|
||||
private:
|
||||
double target_level_loudness_;
|
||||
double last_voice_probability_;
|
||||
int target_level_dbfs_;
|
||||
bool standalone_vad_enabled_;
|
||||
scoped_ptr<Histogram> histogram_;
|
||||
scoped_ptr<Histogram> inactive_histogram_;
|
||||
scoped_ptr<AgcAudioProc> audio_processing_;
|
||||
scoped_ptr<PitchBasedVad> pitch_based_vad_;
|
||||
scoped_ptr<StandaloneVad> standalone_vad_;
|
||||
scoped_ptr<Resampler> resampler_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
|
270
webrtc/modules/audio_processing/agc/agc_audio_proc.cc
Normal file
270
webrtc/modules/audio_processing/agc/agc_audio_proc.cc
Normal file
@ -0,0 +1,270 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/agc_audio_proc.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/agc_audio_proc_internal.h"
|
||||
#include "webrtc/modules/audio_processing/agc/pitch_internal.h"
|
||||
#include "webrtc/modules/audio_processing/agc/pole_zero_filter.h"
|
||||
extern "C" {
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/main/source/codec.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h"
|
||||
#include "webrtc/modules/audio_processing/utility/fft4g.h"
|
||||
}
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "webrtc/system_wrappers/interface/compile_assert.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// The following structures are declared anonymous in iSAC's structs.h. To
|
||||
// forward declare them, we use this derived class trick.
|
||||
struct AgcAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {};
|
||||
struct AgcAudioProc::PreFiltBankstr : public ::PreFiltBankstr {};
|
||||
|
||||
static const float kFrequencyResolution = kSampleRateHz /
|
||||
static_cast<float>(AgcAudioProc::kDftSize);
|
||||
static const int kSilenceRms = 5;
|
||||
|
||||
// TODO(turajs): Make a Create or Init for AgcAudioProc.
|
||||
AgcAudioProc::AgcAudioProc()
|
||||
: audio_buffer_(),
|
||||
num_buffer_samples_(kNumPastSignalSamples),
|
||||
log_old_gain_(-2),
|
||||
old_lag_(50), // Arbitrary but valid as pitch-lag (in samples).
|
||||
pitch_analysis_handle_(new PitchAnalysisStruct),
|
||||
pre_filter_handle_(new PreFiltBankstr),
|
||||
high_pass_filter_(PoleZeroFilter::Create(
|
||||
kCoeffNumerator, kFilterOrder, kCoeffDenominator, kFilterOrder)) {
|
||||
COMPILE_ASSERT(kNumPastSignalSamples + kNumSubframeSamples ==
|
||||
sizeof(kLpcAnalWin) / sizeof(kLpcAnalWin[0]),
|
||||
lpc_analysis_window_incorrect_size);
|
||||
COMPILE_ASSERT(kLpcOrder + 1 == sizeof(kCorrWeight) / sizeof(kCorrWeight[0]),
|
||||
correlation_weight_incorrect_size);
|
||||
|
||||
// TODO(turajs): Are we doing too much in the constructor?
|
||||
float data[kDftSize];
|
||||
// Make FFT to initialize.
|
||||
ip_[0] = 0;
|
||||
WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_);
|
||||
// TODO(turajs): Need to initialize high-pass filter.
|
||||
|
||||
// Initialize iSAC components.
|
||||
WebRtcIsac_InitPreFilterbank(pre_filter_handle_.get());
|
||||
WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get());
|
||||
}
|
||||
|
||||
AgcAudioProc::~AgcAudioProc() {}
|
||||
|
||||
void AgcAudioProc::ResetBuffer() {
|
||||
memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess],
|
||||
sizeof(audio_buffer_[0]) * kNumPastSignalSamples);
|
||||
num_buffer_samples_ = kNumPastSignalSamples;
|
||||
}
|
||||
|
||||
int AgcAudioProc::ExtractFeatures(const int16_t* frame,
|
||||
int length,
|
||||
AudioFeatures* features) {
|
||||
features->num_frames = 0;
|
||||
if (length != kNumSubframeSamples) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// High-pass filter to remove the DC component and very low frequency content.
|
||||
// We have experienced that this high-pass filtering improves voice/non-voiced
|
||||
// classification.
|
||||
if (high_pass_filter_->Filter(frame, kNumSubframeSamples,
|
||||
&audio_buffer_[num_buffer_samples_]) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
num_buffer_samples_ += kNumSubframeSamples;
|
||||
if (num_buffer_samples_ < kBufferLength) {
|
||||
return 0;
|
||||
}
|
||||
assert(num_buffer_samples_ == kBufferLength);
|
||||
features->num_frames = kNum10msSubframes;
|
||||
features->silence = false;
|
||||
|
||||
Rms(features->rms, kMaxNumFrames);
|
||||
for (int i = 0; i < kNum10msSubframes; ++i) {
|
||||
if (features->rms[i] < kSilenceRms) {
|
||||
// PitchAnalysis can cause NaNs in the pitch gain if it's fed silence.
|
||||
// Bail out here instead.
|
||||
features->silence = true;
|
||||
ResetBuffer();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz,
|
||||
kMaxNumFrames);
|
||||
FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames);
|
||||
ResetBuffer();
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Computes |kLpcOrder + 1| correlation coefficients.
|
||||
void AgcAudioProc::SubframeCorrelation(double* corr, int length_corr,
|
||||
int subframe_index) {
|
||||
assert(length_corr >= kLpcOrder + 1);
|
||||
double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples];
|
||||
int buffer_index = subframe_index * kNumSubframeSamples;
|
||||
|
||||
for (int n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++)
|
||||
windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n];
|
||||
|
||||
WebRtcIsac_AutoCorr(corr, windowed_audio, kNumSubframeSamples +
|
||||
kNumPastSignalSamples, kLpcOrder);
|
||||
}
|
||||
|
||||
// Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input.
|
||||
// The analysis window is 15 ms long and it is centered on the first half of
|
||||
// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the
|
||||
// first half of each 10 ms subframe.
|
||||
void AgcAudioProc::GetLpcPolynomials(double* lpc, int length_lpc) {
|
||||
assert(length_lpc >= kNum10msSubframes * (kLpcOrder + 1));
|
||||
double corr[kLpcOrder + 1];
|
||||
double reflec_coeff[kLpcOrder];
|
||||
for (int i = 0, offset_lpc = 0; i < kNum10msSubframes;
|
||||
i++, offset_lpc += kLpcOrder + 1) {
|
||||
SubframeCorrelation(corr, kLpcOrder + 1, i);
|
||||
corr[0] *= 1.0001;
|
||||
// This makes Lev-Durb a bit more stable.
|
||||
for (int k = 0; k < kLpcOrder + 1; k++) {
|
||||
corr[k] *= kCorrWeight[k];
|
||||
}
|
||||
WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder);
|
||||
}
|
||||
}
|
||||
|
||||
// Fit a second order curve to these 3 points and find the location of the
|
||||
// extremum. The points are inverted before curve fitting.
|
||||
static float QuadraticInterpolation(float prev_val, float curr_val,
|
||||
float next_val) {
|
||||
// Doing the interpolation in |1 / A(z)|^2.
|
||||
float fractional_index = 0;
|
||||
next_val = 1.0f / next_val;
|
||||
prev_val = 1.0f / prev_val;
|
||||
curr_val = 1.0f / curr_val;
|
||||
|
||||
fractional_index = -(next_val - prev_val) * 0.5f / (next_val + prev_val -
|
||||
2.f * curr_val);
|
||||
assert(fabs(fractional_index) < 1);
|
||||
return fractional_index;
|
||||
}
|
||||
|
||||
// 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope
|
||||
// of the input signal. The local maximum of the spectral envelope corresponds
|
||||
// with the local minimum of A(z). It saves complexity, as we save one
|
||||
// inversion. Furthermore, we find the first local maximum of magnitude squared,
|
||||
// to save on one square root.
|
||||
void AgcAudioProc::FindFirstSpectralPeaks(double* f_peak, int length_f_peak) {
|
||||
assert(length_f_peak >= kNum10msSubframes);
|
||||
double lpc[kNum10msSubframes * (kLpcOrder + 1)];
|
||||
// For all sub-frames.
|
||||
GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1));
|
||||
|
||||
const int kNumDftCoefficients = kDftSize / 2 + 1;
|
||||
float data[kDftSize];
|
||||
|
||||
for (int i = 0; i < kNum10msSubframes; i++) {
|
||||
// Convert to float with zero pad.
|
||||
memset(data, 0, sizeof(data));
|
||||
for (int n = 0; n < kLpcOrder + 1; n++) {
|
||||
data[n] = static_cast<float>(lpc[i * (kLpcOrder + 1) + n]);
|
||||
}
|
||||
// Transform to frequency domain.
|
||||
WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_);
|
||||
|
||||
int index_peak = 0;
|
||||
float prev_magn_sqr = data[0] * data[0];
|
||||
float curr_magn_sqr = data[2] * data[2] + data[3] * data[3];
|
||||
float next_magn_sqr;
|
||||
bool found_peak = false;
|
||||
for (int n = 2; n < kNumDftCoefficients - 1; n++) {
|
||||
next_magn_sqr = data[2 * n] * data[2 * n] +
|
||||
data[2 * n + 1] * data[2 * n + 1];
|
||||
if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) {
|
||||
found_peak = true;
|
||||
index_peak = n - 1;
|
||||
break;
|
||||
}
|
||||
prev_magn_sqr = curr_magn_sqr;
|
||||
curr_magn_sqr = next_magn_sqr;
|
||||
}
|
||||
float fractional_index = 0;
|
||||
if (!found_peak) {
|
||||
// Checking if |kNumDftCoefficients - 1| is the local minimum.
|
||||
next_magn_sqr = data[1] * data[1];
|
||||
if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) {
|
||||
index_peak = kNumDftCoefficients - 1;
|
||||
}
|
||||
} else {
|
||||
// A peak is found, do a simple quadratic interpolation to get a more
|
||||
// accurate estimate of the peak location.
|
||||
fractional_index = QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr,
|
||||
next_magn_sqr);
|
||||
}
|
||||
f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution;
|
||||
}
|
||||
}
|
||||
|
||||
// Using iSAC functions to estimate pitch gains & lags.
|
||||
void AgcAudioProc::PitchAnalysis(double* log_pitch_gains, double* pitch_lags_hz,
|
||||
int length) {
|
||||
// TODO(turajs): This can be "imported" from iSAC & and the next two
|
||||
// constants.
|
||||
assert(length >= kNum10msSubframes);
|
||||
const int kNumPitchSubframes = 4;
|
||||
double gains[kNumPitchSubframes];
|
||||
double lags[kNumPitchSubframes];
|
||||
|
||||
const int kNumSubbandFrameSamples = 240;
|
||||
const int kNumLookaheadSamples = 24;
|
||||
|
||||
float lower[kNumSubbandFrameSamples];
|
||||
float upper[kNumSubbandFrameSamples];
|
||||
double lower_lookahead[kNumSubbandFrameSamples];
|
||||
double upper_lookahead[kNumSubbandFrameSamples];
|
||||
double lower_lookahead_pre_filter[kNumSubbandFrameSamples +
|
||||
kNumLookaheadSamples];
|
||||
|
||||
// Split signal to lower and upper bands
|
||||
WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples],
|
||||
lower, upper, lower_lookahead, upper_lookahead,
|
||||
pre_filter_handle_.get());
|
||||
WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter,
|
||||
pitch_analysis_handle_.get(), lags, gains);
|
||||
|
||||
// Lags are computed on lower-band signal with sampling rate half of the
|
||||
// input signal.
|
||||
GetSubframesPitchParameters(kSampleRateHz / 2, gains, lags,
|
||||
kNumPitchSubframes, kNum10msSubframes,
|
||||
&log_old_gain_, &old_lag_,
|
||||
log_pitch_gains, pitch_lags_hz);
|
||||
}
|
||||
|
||||
void AgcAudioProc::Rms(double* rms, int length_rms) {
|
||||
assert(length_rms >= kNum10msSubframes);
|
||||
int offset = kNumPastSignalSamples;
|
||||
for (int i = 0; i < kNum10msSubframes; i++) {
|
||||
rms[i] = 0;
|
||||
for (int n = 0; n < kNumSubframeSamples; n++, offset++)
|
||||
rms[i] += audio_buffer_[offset] * audio_buffer_[offset];
|
||||
rms[i] = sqrt(rms[i] / kNumSubframeSamples);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
83
webrtc/modules/audio_processing/agc/agc_audio_proc.h
Normal file
83
webrtc/modules/audio_processing/agc/agc_audio_proc.h
Normal file
@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_H_
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/common.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioFrame;
|
||||
class PoleZeroFilter;
|
||||
|
||||
class AgcAudioProc {
|
||||
public:
|
||||
// Forward declare iSAC structs.
|
||||
struct PitchAnalysisStruct;
|
||||
struct PreFiltBankstr;
|
||||
|
||||
AgcAudioProc();
|
||||
~AgcAudioProc();
|
||||
|
||||
int ExtractFeatures(const int16_t* audio_frame,
|
||||
int length,
|
||||
AudioFeatures* audio_features);
|
||||
|
||||
static const int kDftSize = 512;
|
||||
|
||||
private:
|
||||
void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, int length);
|
||||
void SubframeCorrelation(double* corr, int lenght_corr, int subframe_index);
|
||||
void GetLpcPolynomials(double* lpc, int length_lpc);
|
||||
void FindFirstSpectralPeaks(double* f_peak, int length_f_peak);
|
||||
void Rms(double* rms, int length_rms);
|
||||
void ResetBuffer();
|
||||
|
||||
// To compute spectral peak we perform LPC analysis to get spectral envelope.
|
||||
// For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
|
||||
// LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
|
||||
// we need 5 ms of past signal to create the input of LPC analysis.
|
||||
static const int kNumPastSignalSamples = kSampleRateHz / 200;
|
||||
|
||||
// TODO(turajs): maybe defining this at a higher level (maybe enum) so that
|
||||
// all the code recognize it as "no-error."
|
||||
static const int kNoError = 0;
|
||||
|
||||
static const int kNum10msSubframes = 3;
|
||||
static const int kNumSubframeSamples = kSampleRateHz / 100;
|
||||
static const int kNumSamplesToProcess = kNum10msSubframes *
|
||||
kNumSubframeSamples; // Samples in 30 ms @ given sampling rate.
|
||||
static const int kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess;
|
||||
static const int kIpLength = kDftSize >> 1;
|
||||
static const int kWLength = kDftSize >> 1;
|
||||
|
||||
static const int kLpcOrder = 16;
|
||||
|
||||
int ip_[kIpLength];
|
||||
float w_fft_[kWLength];
|
||||
|
||||
// A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ).
|
||||
float audio_buffer_[kBufferLength];
|
||||
int num_buffer_samples_;
|
||||
|
||||
double log_old_gain_;
|
||||
double old_lag_;
|
||||
|
||||
scoped_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
|
||||
scoped_ptr<PreFiltBankstr> pre_filter_handle_;
|
||||
scoped_ptr<PoleZeroFilter> high_pass_filter_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_H_
|
@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_INTERNAL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_AUDIO_PROC_INTERNAL_H_
|
||||
|
||||
#include "webrtc/system_wrappers/interface/compile_assert.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// These values should match MATLAB counterparts for unit-tests to pass.
|
||||
static const double kCorrWeight[] = {
|
||||
1.000000, 0.985000, 0.970225, 0.955672, 0.941337, 0.927217, 0.913308,
|
||||
0.899609, 0.886115, 0.872823, 0.859730, 0.846834, 0.834132, 0.821620,
|
||||
0.809296, 0.797156, 0.785199
|
||||
};
|
||||
|
||||
static const double kLpcAnalWin[] = {
|
||||
0.00000000, 0.01314436, 0.02628645, 0.03942400, 0.05255473, 0.06567639,
|
||||
0.07878670, 0.09188339, 0.10496421, 0.11802689, 0.13106918, 0.14408883,
|
||||
0.15708358, 0.17005118, 0.18298941, 0.19589602, 0.20876878, 0.22160547,
|
||||
0.23440387, 0.24716177, 0.25987696, 0.27254725, 0.28517045, 0.29774438,
|
||||
0.31026687, 0.32273574, 0.33514885, 0.34750406, 0.35979922, 0.37203222,
|
||||
0.38420093, 0.39630327, 0.40833713, 0.42030043, 0.43219112, 0.44400713,
|
||||
0.45574642, 0.46740697, 0.47898676, 0.49048379, 0.50189608, 0.51322164,
|
||||
0.52445853, 0.53560481, 0.54665854, 0.55761782, 0.56848075, 0.57924546,
|
||||
0.58991008, 0.60047278, 0.61093173, 0.62128512, 0.63153117, 0.64166810,
|
||||
0.65169416, 0.66160761, 0.67140676, 0.68108990, 0.69065536, 0.70010148,
|
||||
0.70942664, 0.71862923, 0.72770765, 0.73666033, 0.74548573, 0.75418233,
|
||||
0.76274862, 0.77118312, 0.77948437, 0.78765094, 0.79568142, 0.80357442,
|
||||
0.81132858, 0.81894256, 0.82641504, 0.83374472, 0.84093036, 0.84797069,
|
||||
0.85486451, 0.86161063, 0.86820787, 0.87465511, 0.88095122, 0.88709512,
|
||||
0.89308574, 0.89892206, 0.90460306, 0.91012776, 0.91549520, 0.92070447,
|
||||
0.92575465, 0.93064488, 0.93537432, 0.93994213, 0.94434755, 0.94858979,
|
||||
0.95266814, 0.95658189, 0.96033035, 0.96391289, 0.96732888, 0.97057773,
|
||||
0.97365889, 0.97657181, 0.97931600, 0.98189099, 0.98429632, 0.98653158,
|
||||
0.98859639, 0.99049038, 0.99221324, 0.99376466, 0.99514438, 0.99635215,
|
||||
0.99738778, 0.99825107, 0.99894188, 0.99946010, 0.99980562, 0.99997840,
|
||||
0.99997840, 0.99980562, 0.99946010, 0.99894188, 0.99825107, 0.99738778,
|
||||
0.99635215, 0.99514438, 0.99376466, 0.99221324, 0.99049038, 0.98859639,
|
||||
0.98653158, 0.98429632, 0.98189099, 0.97931600, 0.97657181, 0.97365889,
|
||||
0.97057773, 0.96732888, 0.96391289, 0.96033035, 0.95658189, 0.95266814,
|
||||
0.94858979, 0.94434755, 0.93994213, 0.93537432, 0.93064488, 0.92575465,
|
||||
0.92070447, 0.91549520, 0.91012776, 0.90460306, 0.89892206, 0.89308574,
|
||||
0.88709512, 0.88095122, 0.87465511, 0.86820787, 0.86161063, 0.85486451,
|
||||
0.84797069, 0.84093036, 0.83374472, 0.82641504, 0.81894256, 0.81132858,
|
||||
0.80357442, 0.79568142, 0.78765094, 0.77948437, 0.77118312, 0.76274862,
|
||||
0.75418233, 0.74548573, 0.73666033, 0.72770765, 0.71862923, 0.70942664,
|
||||
0.70010148, 0.69065536, 0.68108990, 0.67140676, 0.66160761, 0.65169416,
|
||||
0.64166810, 0.63153117, 0.62128512, 0.61093173, 0.60047278, 0.58991008,
|
||||
0.57924546, 0.56848075, 0.55761782, 0.54665854, 0.53560481, 0.52445853,
|
||||
0.51322164, 0.50189608, 0.49048379, 0.47898676, 0.46740697, 0.45574642,
|
||||
0.44400713, 0.43219112, 0.42030043, 0.40833713, 0.39630327, 0.38420093,
|
||||
0.37203222, 0.35979922, 0.34750406, 0.33514885, 0.32273574, 0.31026687,
|
||||
0.29774438, 0.28517045, 0.27254725, 0.25987696, 0.24716177, 0.23440387,
|
||||
0.22160547, 0.20876878, 0.19589602, 0.18298941, 0.17005118, 0.15708358,
|
||||
0.14408883, 0.13106918, 0.11802689, 0.10496421, 0.09188339, 0.07878670,
|
||||
0.06567639, 0.05255473, 0.03942400, 0.02628645, 0.01314436, 0.00000000
|
||||
};
|
||||
|
||||
static const int kFilterOrder = 2;
|
||||
static const float kCoeffNumerator[kFilterOrder + 1] = {0.974827f, -1.949650f,
|
||||
0.974827f};
|
||||
static const float kCoeffDenominator[kFilterOrder + 1] = {1.0f, -1.971999f,
|
||||
0.972457f};
|
||||
|
||||
COMPILE_ASSERT(kFilterOrder + 1 == sizeof(kCoeffNumerator) /
|
||||
sizeof(kCoeffNumerator[0]), numerator_coefficients_incorrect_size);
|
||||
COMPILE_ASSERT(kFilterOrder + 1 == sizeof(kCoeffDenominator) /
|
||||
sizeof(kCoeffDenominator[0]), denominator_coefficients_incorrect_size);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AUDIO_PROCESSING_H_
|
@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// We don't test the value of pitch gain and lags as they are created by iSAC
|
||||
// routines. However, interpolation of pitch-gain and lags is in a separate
|
||||
// class and has its own unit-test.
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/agc_audio_proc.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "webrtc/modules/audio_processing/agc/common.h"
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "webrtc/test/testsupport/fileutils.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(AudioProcessingTest, DISABLED_ComputingFirstSpectralPeak) {
|
||||
AgcAudioProc audioproc;
|
||||
|
||||
std::string peak_file_name =
|
||||
test::ResourcePath("audio_processing/agc/agc_spectral_peak", "dat");
|
||||
FILE* peak_file = fopen(peak_file_name.c_str(), "rb");
|
||||
ASSERT_TRUE(peak_file != NULL);
|
||||
|
||||
std::string pcm_file_name =
|
||||
test::ResourcePath("audio_processing/agc/agc_audio", "pcm");
|
||||
FILE* pcm_file = fopen(pcm_file_name.c_str(), "rb");
|
||||
ASSERT_TRUE(pcm_file != NULL);
|
||||
|
||||
// Read 10 ms audio in each iteration.
|
||||
const size_t kDataLength = kLength10Ms;
|
||||
int16_t data[kDataLength] = { 0 };
|
||||
AudioFeatures features;
|
||||
double sp[kMaxNumFrames];
|
||||
while (fread(data, sizeof(int16_t), kDataLength, pcm_file) == kDataLength) {
|
||||
audioproc.ExtractFeatures(data, kDataLength, &features);
|
||||
if (features.num_frames > 0) {
|
||||
ASSERT_LT(features.num_frames, kMaxNumFrames);
|
||||
// Read reference values.
|
||||
const size_t num_frames = features.num_frames;
|
||||
ASSERT_EQ(num_frames, fread(sp, sizeof(sp[0]), num_frames, peak_file));
|
||||
for (int n = 0; n < features.num_frames; n++)
|
||||
EXPECT_NEAR(features.spectral_peak[n], sp[n], 3);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(peak_file);
|
||||
fclose(pcm_file);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
436
webrtc/modules/audio_processing/agc/agc_manager_direct.cc
Normal file
436
webrtc/modules/audio_processing/agc/agc_manager_direct.cc
Normal file
@ -0,0 +1,436 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
|
||||
#ifdef WEBRTC_AGC_DEBUG_DUMP
|
||||
#include <cstdio>
|
||||
#endif
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/gain_map_internal.h"
|
||||
#include "webrtc/modules/audio_processing/gain_control_impl.h"
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "webrtc/system_wrappers/interface/compile_assert.h"
|
||||
#include "webrtc/system_wrappers/interface/logging.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Lowest the microphone level can be lowered due to clipping.
|
||||
const int kClippedLevelMin = 170;
|
||||
// Amount the microphone level is lowered with every clipping event.
|
||||
const int kClippedLevelStep = 15;
|
||||
// Proportion of clipped samples required to declare a clipping event.
|
||||
const float kClippedRatioThreshold = 0.1f;
|
||||
// Time in frames to wait after a clipping event before checking again.
|
||||
const int kClippedWaitFrames = 300;
|
||||
|
||||
// Amount of error we tolerate in the microphone level (presumably due to OS
|
||||
// quantization) before we assume the user has manually adjusted the microphone.
|
||||
const int kLevelQuantizationSlack = 25;
|
||||
|
||||
const int kDefaultCompressionGain = 7;
|
||||
const int kMaxCompressionGain = 12;
|
||||
const int kMinCompressionGain = 2;
|
||||
// Controls the rate of compression changes towards the target.
|
||||
const float kCompressionGainStep = 0.05f;
|
||||
|
||||
const int kMaxMicLevel = 255;
|
||||
COMPILE_ASSERT(kGainMapSize > kMaxMicLevel, gain_map_too_small);
|
||||
const int kMinMicLevel = 12;
|
||||
const int kMinInitMicLevel = 85;
|
||||
|
||||
// Prevent very large microphone level changes.
|
||||
const int kMaxResidualGainChange = 15;
|
||||
|
||||
// Maximum additional gain allowed to compensate for microphone level
|
||||
// restrictions from clipping events.
|
||||
const int kSurplusCompressionGain = 6;
|
||||
|
||||
int LevelFromGainError(int gain_error, int level) {
|
||||
assert(level >= 0 && level <= kMaxMicLevel);
|
||||
if (gain_error == 0) {
|
||||
return level;
|
||||
}
|
||||
// TODO(ajm): Could be made more efficient with a binary search.
|
||||
int new_level = level;
|
||||
if (gain_error > 0) {
|
||||
while (kGainMap[new_level] - kGainMap[level] < gain_error &&
|
||||
new_level < kMaxMicLevel) {
|
||||
++new_level;
|
||||
}
|
||||
} else {
|
||||
while (kGainMap[new_level] - kGainMap[level] > gain_error &&
|
||||
new_level > kMinMicLevel) {
|
||||
--new_level;
|
||||
}
|
||||
}
|
||||
return new_level;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Facility for dumping debug audio files. All methods are no-ops in the
|
||||
// default case where WEBRTC_AGC_DEBUG_DUMP is undefined.
|
||||
class DebugFile {
|
||||
#ifdef WEBRTC_AGC_DEBUG_DUMP
|
||||
public:
|
||||
explicit DebugFile(const char* filename)
|
||||
: file_(fopen(filename, "wb")) {
|
||||
assert(file_);
|
||||
}
|
||||
~DebugFile() {
|
||||
fclose(file_);
|
||||
}
|
||||
void Write(const int16_t* data, int length_samples) {
|
||||
fwrite(data, 1, length_samples * sizeof(int16_t), file_);
|
||||
}
|
||||
private:
|
||||
FILE* file_;
|
||||
#else
|
||||
public:
|
||||
explicit DebugFile(const char* filename) {
|
||||
}
|
||||
~DebugFile() {
|
||||
}
|
||||
void Write(const int16_t* data, int length_samples) {
|
||||
}
|
||||
#endif // WEBRTC_AGC_DEBUG_DUMP
|
||||
};
|
||||
|
||||
AgcManagerDirect::AgcManagerDirect(GainControl* gctrl,
|
||||
VolumeCallbacks* volume_callbacks)
|
||||
: agc_(new Agc()),
|
||||
gctrl_(gctrl),
|
||||
volume_callbacks_(volume_callbacks),
|
||||
frames_since_clipped_(kClippedWaitFrames),
|
||||
level_(0),
|
||||
max_level_(kMaxMicLevel),
|
||||
max_compression_gain_(kMaxCompressionGain),
|
||||
target_compression_(kDefaultCompressionGain),
|
||||
compression_(target_compression_),
|
||||
compression_accumulator_(compression_),
|
||||
capture_muted_(false),
|
||||
check_volume_on_next_process_(true), // Check at startup.
|
||||
startup_(true),
|
||||
file_preproc_(new DebugFile("agc_preproc.pcm")),
|
||||
file_postproc_(new DebugFile("agc_postproc.pcm")) {
|
||||
}
|
||||
|
||||
AgcManagerDirect::AgcManagerDirect(Agc* agc,
|
||||
GainControl* gctrl,
|
||||
VolumeCallbacks* volume_callbacks)
|
||||
: agc_(agc),
|
||||
gctrl_(gctrl),
|
||||
volume_callbacks_(volume_callbacks),
|
||||
frames_since_clipped_(kClippedWaitFrames),
|
||||
level_(0),
|
||||
max_level_(kMaxMicLevel),
|
||||
max_compression_gain_(kMaxCompressionGain),
|
||||
target_compression_(kDefaultCompressionGain),
|
||||
compression_(target_compression_),
|
||||
compression_accumulator_(compression_),
|
||||
capture_muted_(false),
|
||||
check_volume_on_next_process_(true), // Check at startup.
|
||||
startup_(true),
|
||||
file_preproc_(new DebugFile("agc_preproc.pcm")),
|
||||
file_postproc_(new DebugFile("agc_postproc.pcm")) {
|
||||
}
|
||||
|
||||
AgcManagerDirect::~AgcManagerDirect() {}
|
||||
|
||||
int AgcManagerDirect::Initialize() {
|
||||
max_level_ = kMaxMicLevel;
|
||||
max_compression_gain_ = kMaxCompressionGain;
|
||||
target_compression_ = kDefaultCompressionGain;
|
||||
compression_ = target_compression_;
|
||||
compression_accumulator_ = compression_;
|
||||
capture_muted_ = false;
|
||||
check_volume_on_next_process_ = true;
|
||||
// TODO(bjornv): Investigate if we need to reset |startup_| as well. For
|
||||
// example, what happens when we change devices.
|
||||
|
||||
if (gctrl_->set_mode(GainControl::kFixedDigital) != 0) {
|
||||
LOG_FERR1(LS_ERROR, set_mode, GainControl::kFixedDigital);
|
||||
return -1;
|
||||
}
|
||||
if (gctrl_->set_target_level_dbfs(2) != 0) {
|
||||
LOG_FERR1(LS_ERROR, set_target_level_dbfs, 2);
|
||||
return -1;
|
||||
}
|
||||
if (gctrl_->set_compression_gain_db(kDefaultCompressionGain) != 0) {
|
||||
LOG_FERR1(LS_ERROR, set_compression_gain_db, kDefaultCompressionGain);
|
||||
return -1;
|
||||
}
|
||||
if (gctrl_->enable_limiter(true) != 0) {
|
||||
LOG_FERR1(LS_ERROR, enable_limiter, true);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void AgcManagerDirect::AnalyzePreProcess(int16_t* audio,
|
||||
int num_channels,
|
||||
int samples_per_channel) {
|
||||
int length = num_channels * samples_per_channel;
|
||||
if (capture_muted_) {
|
||||
return;
|
||||
}
|
||||
|
||||
file_preproc_->Write(audio, length);
|
||||
|
||||
if (frames_since_clipped_ < kClippedWaitFrames) {
|
||||
++frames_since_clipped_;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for clipped samples, as the AGC has difficulty detecting pitch
|
||||
// under clipping distortion. We do this in the preprocessing phase in order
|
||||
// to catch clipped echo as well.
|
||||
//
|
||||
// If we find a sufficiently clipped frame, drop the current microphone level
|
||||
// and enforce a new maximum level, dropped the same amount from the current
|
||||
// maximum. This harsh treatment is an effort to avoid repeated clipped echo
|
||||
// events. As compensation for this restriction, the maximum compression
|
||||
// gain is increased, through SetMaxLevel().
|
||||
float clipped_ratio = agc_->AnalyzePreproc(audio, length);
|
||||
if (clipped_ratio > kClippedRatioThreshold) {
|
||||
LOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio="
|
||||
<< clipped_ratio;
|
||||
// Always decrease the maximum level, even if the current level is below
|
||||
// threshold.
|
||||
SetMaxLevel(std::max(kClippedLevelMin, max_level_ - kClippedLevelStep));
|
||||
if (level_ > kClippedLevelMin) {
|
||||
// Don't try to adjust the level if we're already below the limit. As
|
||||
// a consequence, if the user has brought the level above the limit, we
|
||||
// will still not react until the postproc updates the level.
|
||||
SetLevel(std::max(kClippedLevelMin, level_ - kClippedLevelStep));
|
||||
// Reset the AGC since the level has changed.
|
||||
agc_->Reset();
|
||||
}
|
||||
frames_since_clipped_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void AgcManagerDirect::Process(const int16_t* audio,
|
||||
int length,
|
||||
int sample_rate_hz) {
|
||||
if (capture_muted_) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (check_volume_on_next_process_) {
|
||||
check_volume_on_next_process_ = false;
|
||||
// We have to wait until the first process call to check the volume,
|
||||
// because Chromium doesn't guarantee it to be valid any earlier.
|
||||
CheckVolumeAndReset();
|
||||
}
|
||||
|
||||
if (agc_->Process(audio, length, sample_rate_hz) != 0) {
|
||||
LOG_FERR0(LS_ERROR, Agc::Process);
|
||||
assert(false);
|
||||
}
|
||||
|
||||
UpdateGain();
|
||||
UpdateCompressor();
|
||||
|
||||
file_postproc_->Write(audio, length);
|
||||
}
|
||||
|
||||
void AgcManagerDirect::SetLevel(int new_level) {
|
||||
int voe_level = volume_callbacks_->GetMicVolume();
|
||||
if (voe_level < 0) {
|
||||
return;
|
||||
}
|
||||
if (voe_level == 0) {
|
||||
LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action.";
|
||||
return;
|
||||
}
|
||||
if (voe_level > kMaxMicLevel) {
|
||||
LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << voe_level;
|
||||
return;
|
||||
}
|
||||
|
||||
if (voe_level > level_ + kLevelQuantizationSlack ||
|
||||
voe_level < level_ - kLevelQuantizationSlack) {
|
||||
LOG(LS_INFO) << "[agc] Mic volume was manually adjusted. Updating "
|
||||
<< "stored level from " << level_ << " to " << voe_level;
|
||||
level_ = voe_level;
|
||||
// Always allow the user to increase the volume.
|
||||
if (level_ > max_level_) {
|
||||
SetMaxLevel(level_);
|
||||
}
|
||||
// Take no action in this case, since we can't be sure when the volume
|
||||
// was manually adjusted. The compressor will still provide some of the
|
||||
// desired gain change.
|
||||
agc_->Reset();
|
||||
return;
|
||||
}
|
||||
|
||||
new_level = std::min(new_level, max_level_);
|
||||
if (new_level == level_) {
|
||||
return;
|
||||
}
|
||||
|
||||
volume_callbacks_->SetMicVolume(new_level);
|
||||
LOG(LS_INFO) << "[agc] voe_level=" << voe_level << ", "
|
||||
<< "level_=" << level_ << ", "
|
||||
<< "new_level=" << new_level;
|
||||
level_ = new_level;
|
||||
}
|
||||
|
||||
void AgcManagerDirect::SetMaxLevel(int level) {
|
||||
assert(level >= kClippedLevelMin);
|
||||
max_level_ = level;
|
||||
// Scale the |kSurplusCompressionGain| linearly across the restricted
|
||||
// level range.
|
||||
max_compression_gain_ = kMaxCompressionGain + std::floor(
|
||||
(1.f * kMaxMicLevel - max_level_) / (kMaxMicLevel - kClippedLevelMin) *
|
||||
kSurplusCompressionGain + 0.5f);
|
||||
LOG(LS_INFO) << "[agc] max_level_=" << max_level_
|
||||
<< ", max_compression_gain_=" << max_compression_gain_;
|
||||
}
|
||||
|
||||
void AgcManagerDirect::SetCaptureMuted(bool muted) {
|
||||
if (capture_muted_ == muted) {
|
||||
return;
|
||||
}
|
||||
capture_muted_ = muted;
|
||||
|
||||
if (!muted) {
|
||||
// When we unmute, we should reset things to be safe.
|
||||
check_volume_on_next_process_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
float AgcManagerDirect::voice_probability() {
|
||||
return static_cast<float>(agc_->voice_probability());
|
||||
}
|
||||
|
||||
int AgcManagerDirect::CheckVolumeAndReset() {
|
||||
int level = volume_callbacks_->GetMicVolume();
|
||||
if (level < 0) {
|
||||
return -1;
|
||||
}
|
||||
// Reasons for taking action at startup:
|
||||
// 1) A person starting a call is expected to be heard.
|
||||
// 2) Independent of interpretation of |level| == 0 we should raise it so the
|
||||
// AGC can do its job properly.
|
||||
if (level == 0 && !startup_) {
|
||||
LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action.";
|
||||
return 0;
|
||||
}
|
||||
if (level > kMaxMicLevel) {
|
||||
LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << level;
|
||||
return -1;
|
||||
}
|
||||
LOG(LS_INFO) << "[agc] Initial GetMicVolume()=" << level;
|
||||
|
||||
int minLevel = startup_ ? kMinInitMicLevel : kMinMicLevel;
|
||||
if (level < minLevel) {
|
||||
level = minLevel;
|
||||
LOG(LS_INFO) << "[agc] Initial volume too low, raising to " << level;
|
||||
volume_callbacks_->SetMicVolume(level);
|
||||
}
|
||||
agc_->Reset();
|
||||
level_ = level;
|
||||
startup_ = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Requests the RMS error from AGC and distributes the required gain change
|
||||
// between the digital compression stage and volume slider. We use the
|
||||
// compressor first, providing a slack region around the current slider
|
||||
// position to reduce movement.
|
||||
//
|
||||
// If the slider needs to be moved, we check first if the user has adjusted
|
||||
// it, in which case we take no action and cache the updated level.
|
||||
void AgcManagerDirect::UpdateGain() {
|
||||
int rms_error = 0;
|
||||
if (!agc_->GetRmsErrorDb(&rms_error)) {
|
||||
// No error update ready.
|
||||
return;
|
||||
}
|
||||
// The compressor will always add at least kMinCompressionGain. In effect,
|
||||
// this adjusts our target gain upward by the same amount and rms_error
|
||||
// needs to reflect that.
|
||||
rms_error += kMinCompressionGain;
|
||||
|
||||
// Handle as much error as possible with the compressor first.
|
||||
int raw_compression = std::max(std::min(rms_error, max_compression_gain_),
|
||||
kMinCompressionGain);
|
||||
// Deemphasize the compression gain error. Move halfway between the current
|
||||
// target and the newly received target. This serves to soften perceptible
|
||||
// intra-talkspurt adjustments, at the cost of some adaptation speed.
|
||||
if ((raw_compression == max_compression_gain_ &&
|
||||
target_compression_ == max_compression_gain_ - 1) ||
|
||||
(raw_compression == kMinCompressionGain &&
|
||||
target_compression_ == kMinCompressionGain + 1)) {
|
||||
// Special case to allow the target to reach the endpoints of the
|
||||
// compression range. The deemphasis would otherwise halt it at 1 dB shy.
|
||||
target_compression_ = raw_compression;
|
||||
} else {
|
||||
target_compression_ = (raw_compression - target_compression_) / 2
|
||||
+ target_compression_;
|
||||
}
|
||||
|
||||
// Residual error will be handled by adjusting the volume slider. Use the
|
||||
// raw rather than deemphasized compression here as we would otherwise
|
||||
// shrink the amount of slack the compressor provides.
|
||||
int residual_gain = rms_error - raw_compression;
|
||||
residual_gain = std::min(std::max(residual_gain, -kMaxResidualGainChange),
|
||||
kMaxResidualGainChange);
|
||||
LOG(LS_INFO) << "[agc] rms_error=" << rms_error << ", "
|
||||
<< "target_compression=" << target_compression_ << ", "
|
||||
<< "residual_gain=" << residual_gain;
|
||||
if (residual_gain == 0)
|
||||
return;
|
||||
|
||||
SetLevel(LevelFromGainError(residual_gain, level_));
|
||||
}
|
||||
|
||||
void AgcManagerDirect::UpdateCompressor() {
|
||||
if (compression_ == target_compression_) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Adapt the compression gain slowly towards the target, in order to avoid
|
||||
// highly perceptible changes.
|
||||
if (target_compression_ > compression_) {
|
||||
compression_accumulator_ += kCompressionGainStep;
|
||||
} else {
|
||||
compression_accumulator_ -= kCompressionGainStep;
|
||||
}
|
||||
|
||||
// The compressor accepts integer gains in dB. Adjust the gain when
|
||||
// we've come within half a stepsize of the nearest integer. (We don't
|
||||
// check for equality due to potential floating point imprecision).
|
||||
int new_compression = compression_;
|
||||
int nearest_neighbor = std::floor(compression_accumulator_ + 0.5);
|
||||
if (std::fabs(compression_accumulator_ - nearest_neighbor) <
|
||||
kCompressionGainStep / 2) {
|
||||
new_compression = nearest_neighbor;
|
||||
}
|
||||
|
||||
// Set the new compression gain.
|
||||
if (new_compression != compression_) {
|
||||
compression_ = new_compression;
|
||||
compression_accumulator_ = new_compression;
|
||||
if (gctrl_->set_compression_gain_db(compression_) != 0) {
|
||||
LOG_FERR1(LS_ERROR, set_compression_gain_db, compression_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
98
webrtc/modules/audio_processing/agc/agc_manager_direct.h
Normal file
98
webrtc/modules/audio_processing/agc/agc_manager_direct.h
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/agc.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioFrame;
|
||||
class DebugFile;
|
||||
class GainControl;
|
||||
|
||||
// Callbacks that need to be injected into AgcManagerDirect to read and control
|
||||
// the volume values. They have different behavior if they are called from
|
||||
// AgcManager or AudioProcessing. This is done to remove the VoiceEngine
|
||||
// dependency in AgcManagerDirect.
|
||||
class VolumeCallbacks {
|
||||
public:
|
||||
virtual ~VolumeCallbacks() {}
|
||||
virtual void SetMicVolume(int volume) = 0;
|
||||
virtual int GetMicVolume() = 0;
|
||||
};
|
||||
|
||||
// Direct interface to use AGC to set volume and compression values.
|
||||
// AudioProcessing uses this interface directly to integrate the callback-less
|
||||
// AGC. AgcManager delegates most of its calls here. See agc_manager.h for
|
||||
// undocumented methods.
|
||||
//
|
||||
// This class is not thread-safe.
|
||||
class AgcManagerDirect {
|
||||
public:
|
||||
// AgcManagerDirect will configure GainControl internally. The user is
|
||||
// responsible for processing the audio using it after the call to Process.
|
||||
AgcManagerDirect(GainControl* gctrl, VolumeCallbacks* volume_callbacks);
|
||||
// Dependency injection for testing. Don't delete |agc| as the memory is owned
|
||||
// by the manager.
|
||||
AgcManagerDirect(Agc* agc,
|
||||
GainControl* gctrl,
|
||||
VolumeCallbacks* volume_callbacks);
|
||||
~AgcManagerDirect();
|
||||
|
||||
int Initialize();
|
||||
void AnalyzePreProcess(int16_t* audio,
|
||||
int num_channels,
|
||||
int samples_per_channel);
|
||||
void Process(const int16_t* audio, int length, int sample_rate_hz);
|
||||
|
||||
// Sets a new microphone level, after first checking that it hasn't been
|
||||
// updated by the user, in which case no action is taken.
|
||||
void SetLevel(int new_level);
|
||||
|
||||
// Set the maximum level the AGC is allowed to apply. Also updates the
|
||||
// maximum compression gain to compensate. The level must be at least
|
||||
// |kClippedLevelMin|.
|
||||
void SetMaxLevel(int level);
|
||||
|
||||
void SetCaptureMuted(bool muted);
|
||||
bool capture_muted() { return capture_muted_; }
|
||||
|
||||
float voice_probability();
|
||||
|
||||
private:
|
||||
int CheckVolumeAndReset();
|
||||
void UpdateGain();
|
||||
void UpdateCompressor();
|
||||
|
||||
scoped_ptr<Agc> agc_;
|
||||
GainControl* gctrl_;
|
||||
VolumeCallbacks* volume_callbacks_;
|
||||
|
||||
int frames_since_clipped_;
|
||||
int level_;
|
||||
int max_level_;
|
||||
int max_compression_gain_;
|
||||
int target_compression_;
|
||||
int compression_;
|
||||
float compression_accumulator_;
|
||||
bool capture_muted_;
|
||||
bool check_volume_on_next_process_;
|
||||
bool startup_;
|
||||
|
||||
scoped_ptr<DebugFile> file_preproc_;
|
||||
scoped_ptr<DebugFile> file_postproc_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
|
162
webrtc/modules/audio_processing/agc/agc_unittest.cc
Normal file
162
webrtc/modules/audio_processing/agc/agc_unittest.cc
Normal file
@ -0,0 +1,162 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/agc.h"
|
||||
|
||||
#include "gmock/gmock.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/test/test_utils.h"
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "webrtc/test/testsupport/fileutils.h"
|
||||
|
||||
using ::testing::_;
|
||||
using ::testing::AllOf;
|
||||
using ::testing::AtLeast;
|
||||
using ::testing::Eq;
|
||||
using ::testing::Gt;
|
||||
using ::testing::InSequence;
|
||||
using ::testing::Lt;
|
||||
using ::testing::Mock;
|
||||
using ::testing::SaveArg;
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
// The tested values depend on this assumed gain.
|
||||
const int kMaxGain = 80;
|
||||
|
||||
MATCHER_P(GtPointee, p, "") { return arg > *p; }
|
||||
MATCHER_P(LtPointee, p, "") { return arg < *p; }
|
||||
|
||||
class AgcChecker {
|
||||
public:
|
||||
MOCK_METHOD2(LevelChanged, void(int iterations, int level));
|
||||
};
|
||||
|
||||
class AgcTest : public ::testing::Test {
|
||||
protected:
|
||||
AgcTest()
|
||||
: agc_(),
|
||||
checker_(),
|
||||
mic_level_(128) {
|
||||
}
|
||||
|
||||
// A gain of <= -100 will zero out the signal.
|
||||
void RunAgc(int iterations, float gain_db) {
|
||||
FILE* input_file = fopen(
|
||||
test::ResourcePath("voice_engine/audio_long16", "pcm").c_str(), "rb");
|
||||
ASSERT_TRUE(input_file != NULL);
|
||||
|
||||
AudioFrame frame;
|
||||
frame.sample_rate_hz_ = 16000;
|
||||
frame.num_channels_ = 1;
|
||||
frame.samples_per_channel_ = frame.sample_rate_hz_ / 100;
|
||||
const size_t length = frame.samples_per_channel_ * frame.num_channels_;
|
||||
|
||||
float gain = Db2Linear(gain_db);
|
||||
if (gain_db <= -100) {
|
||||
gain = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
ASSERT_EQ(length, fread(frame.data_, sizeof(int16_t), length,
|
||||
input_file));
|
||||
SimulateMic(kMaxGain, mic_level_, &frame);
|
||||
ApplyGainLinear(gain, &frame);
|
||||
ASSERT_GE(agc_.Process(frame), 0);
|
||||
|
||||
int mic_level = agc_.MicLevel();
|
||||
if (mic_level != mic_level_) {
|
||||
printf("mic_level=%d\n", mic_level);
|
||||
checker_.LevelChanged(i, mic_level);
|
||||
}
|
||||
mic_level_ = mic_level;
|
||||
}
|
||||
fclose(input_file);
|
||||
}
|
||||
|
||||
Agc agc_;
|
||||
AgcChecker checker_;
|
||||
// Stores mic level between multiple runs of RunAgc in one test.
|
||||
int mic_level_;
|
||||
};
|
||||
|
||||
TEST_F(AgcTest, UpwardsChangeIsLimited) {
|
||||
{
|
||||
InSequence seq;
|
||||
EXPECT_CALL(checker_, LevelChanged(Lt(500), Eq(179))).Times(1);
|
||||
EXPECT_CALL(checker_, LevelChanged(_, Gt(179))).Times(AtLeast(1));
|
||||
}
|
||||
RunAgc(1000, -40);
|
||||
}
|
||||
|
||||
TEST_F(AgcTest, DownwardsChangeIsLimited) {
|
||||
{
|
||||
InSequence seq;
|
||||
EXPECT_CALL(checker_, LevelChanged(Lt(500), Eq(77))).Times(1);
|
||||
EXPECT_CALL(checker_, LevelChanged(_, Lt(77))).Times(AtLeast(1));
|
||||
}
|
||||
RunAgc(1000, 40);
|
||||
}
|
||||
|
||||
TEST_F(AgcTest, MovesUpToMaxAndDownToMin) {
|
||||
int last_level = 128;
|
||||
EXPECT_CALL(checker_, LevelChanged(_, GtPointee(&last_level)))
|
||||
.Times(AtLeast(2))
|
||||
.WillRepeatedly(SaveArg<1>(&last_level));
|
||||
RunAgc(1000, -30);
|
||||
EXPECT_EQ(255, last_level);
|
||||
Mock::VerifyAndClearExpectations(&checker_);
|
||||
|
||||
EXPECT_CALL(checker_, LevelChanged(_, LtPointee(&last_level)))
|
||||
.Times(AtLeast(2))
|
||||
.WillRepeatedly(SaveArg<1>(&last_level));
|
||||
RunAgc(1000, 50);
|
||||
EXPECT_EQ(1, last_level);
|
||||
}
|
||||
|
||||
TEST_F(AgcTest, HandlesZeroSignal) {
|
||||
int last_level = 128;
|
||||
// Doesn't respond to a zero signal.
|
||||
EXPECT_CALL(checker_, LevelChanged(_, _)).Times(0);
|
||||
RunAgc(1000, -100);
|
||||
Mock::VerifyAndClearExpectations(&checker_);
|
||||
|
||||
// Reacts as usual afterwards.
|
||||
EXPECT_CALL(checker_, LevelChanged(_, GtPointee(&last_level)))
|
||||
.Times(AtLeast(2))
|
||||
.WillRepeatedly(SaveArg<1>(&last_level));
|
||||
RunAgc(500, -20);
|
||||
}
|
||||
|
||||
TEST_F(AgcTest, ReachesSteadyState) {
|
||||
int last_level = 128;
|
||||
EXPECT_CALL(checker_, LevelChanged(_, _))
|
||||
.Times(AtLeast(2))
|
||||
.WillRepeatedly(SaveArg<1>(&last_level));
|
||||
RunAgc(1000, -20);
|
||||
Mock::VerifyAndClearExpectations(&checker_);
|
||||
|
||||
// If the level changes, it should be in a narrow band around the previous
|
||||
// adaptation.
|
||||
EXPECT_CALL(checker_, LevelChanged(_,
|
||||
AllOf(Gt(last_level * 0.95), Lt(last_level * 1.05))))
|
||||
.Times(AtLeast(0));
|
||||
RunAgc(1000, -20);
|
||||
}
|
||||
|
||||
// TODO(ajm): Add this test; requires measuring the signal RMS.
|
||||
TEST_F(AgcTest, AdaptsToCorrectRMS) {
|
||||
}
|
||||
|
||||
} // namespace
|
||||
} // namespace webrtc
|
||||
|
136
webrtc/modules/audio_processing/agc/circular_buffer.cc
Normal file
136
webrtc/modules/audio_processing/agc/circular_buffer.cc
Normal file
@ -0,0 +1,136 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/circular_buffer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
AgcCircularBuffer::AgcCircularBuffer(int buffer_size)
|
||||
: buffer_(new double[buffer_size]),
|
||||
is_full_(false),
|
||||
index_(0),
|
||||
buffer_size_(buffer_size),
|
||||
sum_(0) {}
|
||||
|
||||
AgcCircularBuffer::~AgcCircularBuffer() {}
|
||||
|
||||
void AgcCircularBuffer::Reset() {
|
||||
is_full_ = false;
|
||||
index_ = 0;
|
||||
sum_ = 0;
|
||||
}
|
||||
|
||||
AgcCircularBuffer* AgcCircularBuffer::Create(int buffer_size) {
|
||||
if (buffer_size <= 0)
|
||||
return NULL;
|
||||
return new AgcCircularBuffer(buffer_size);
|
||||
}
|
||||
|
||||
double AgcCircularBuffer::Oldest() const {
|
||||
if (!is_full_)
|
||||
return buffer_[0];
|
||||
else
|
||||
return buffer_[index_];
|
||||
}
|
||||
|
||||
double AgcCircularBuffer::Mean() {
|
||||
double m;
|
||||
if (is_full_) {
|
||||
m = sum_ / buffer_size_;
|
||||
} else {
|
||||
if (index_ > 0)
|
||||
m = sum_ / index_;
|
||||
else
|
||||
m = 0;
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
void AgcCircularBuffer::Insert(double value) {
|
||||
if (is_full_) {
|
||||
sum_ -= buffer_[index_];
|
||||
}
|
||||
sum_ += value;
|
||||
buffer_[index_] = value;
|
||||
index_++;
|
||||
if (index_ >= buffer_size_) {
|
||||
is_full_ = true;
|
||||
index_ = 0;
|
||||
}
|
||||
}
|
||||
int AgcCircularBuffer::BufferLevel() {
|
||||
if (is_full_)
|
||||
return buffer_size_;
|
||||
return index_;
|
||||
}
|
||||
|
||||
int AgcCircularBuffer::Get(int index, double* value) const {
|
||||
int err = ConvertToLinearIndex(&index);
|
||||
if (err < 0)
|
||||
return -1;
|
||||
*value = buffer_[index];
|
||||
return 0;
|
||||
}
|
||||
|
||||
int AgcCircularBuffer::Set(int index, double value) {
|
||||
int err = ConvertToLinearIndex(&index);
|
||||
if (err < 0)
|
||||
return -1;
|
||||
|
||||
sum_ -= buffer_[index];
|
||||
buffer_[index] = value;
|
||||
sum_ += value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int AgcCircularBuffer::ConvertToLinearIndex(int* index) const {
|
||||
if (*index < 0 || *index >= buffer_size_)
|
||||
return -1;
|
||||
|
||||
if (!is_full_ && *index >= index_)
|
||||
return -1;
|
||||
|
||||
*index = index_ - 1 - *index;
|
||||
if (*index < 0)
|
||||
*index += buffer_size_;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int AgcCircularBuffer::RemoveTransient(int width_threshold,
|
||||
double val_threshold) {
|
||||
if (!is_full_ && index_ < width_threshold + 2)
|
||||
return 0;
|
||||
|
||||
int index_1 = 0;
|
||||
int index_2 = width_threshold + 1;
|
||||
double v = 0;
|
||||
if (Get(index_1, &v) < 0)
|
||||
return -1;
|
||||
if (v < val_threshold) {
|
||||
Set(index_1, 0);
|
||||
int index;
|
||||
for (index = index_2; index > index_1; index--) {
|
||||
if (Get(index, &v) < 0)
|
||||
return -1;
|
||||
if (v < val_threshold)
|
||||
break;
|
||||
}
|
||||
for (; index > index_1; index--) {
|
||||
if (Set(index, 0.0) < 0)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
69
webrtc/modules/audio_processing/agc/circular_buffer.h
Normal file
69
webrtc/modules/audio_processing/agc/circular_buffer.h
Normal file
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_CIRCULAR_BUFFER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_CIRCULAR_BUFFER_H_
|
||||
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// A circular buffer tailored to the need of this project. It stores last
|
||||
// K samples of the input, and keeps track of the mean of the last samples.
|
||||
//
|
||||
// It is used in class "PitchBasedActivity" to keep track of posterior
|
||||
// probabilities in the past few seconds. The posterior probabilities are used
|
||||
// to recursively update prior probabilities.
|
||||
class AgcCircularBuffer {
|
||||
public:
|
||||
static AgcCircularBuffer* Create(int buffer_size);
|
||||
~AgcCircularBuffer();
|
||||
|
||||
// If buffer is wrapped around.
|
||||
bool is_full() const { return is_full_; }
|
||||
// Get the oldest entry in the buffer.
|
||||
double Oldest() const;
|
||||
// Insert new value into the buffer.
|
||||
void Insert(double value);
|
||||
// Reset buffer, forget the past, start fresh.
|
||||
void Reset();
|
||||
|
||||
// The mean value of the elements in the buffer. The return value is zero if
|
||||
// buffer is empty, i.e. no value is inserted.
|
||||
double Mean();
|
||||
// Remove transients. If the values exceed |val_threshold| for a period
|
||||
// shorter then or equal to |width_threshold|, then that period is considered
|
||||
// transient and set to zero.
|
||||
int RemoveTransient(int width_threshold, double val_threshold);
|
||||
|
||||
private:
|
||||
explicit AgcCircularBuffer(int buffer_size);
|
||||
// Get previous values. |index = 0| corresponds to the most recent
|
||||
// insertion. |index = 1| is the one before the most recent insertion, and
|
||||
// so on.
|
||||
int Get(int index, double* value) const;
|
||||
// Set a given position to |value|. |index| is interpreted as above.
|
||||
int Set(int index, double value);
|
||||
// Return the number of valid elements in the buffer.
|
||||
int BufferLevel();
|
||||
|
||||
// Convert an index with the interpretation as get() method to the
|
||||
// corresponding linear index.
|
||||
int ConvertToLinearIndex(int* index) const;
|
||||
|
||||
scoped_ptr<double[]> buffer_;
|
||||
bool is_full_;
|
||||
int index_;
|
||||
int buffer_size_;
|
||||
double sum_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_CIRCULAR_BUFFER_H_
|
132
webrtc/modules/audio_processing/agc/circular_buffer_unittest.cc
Normal file
132
webrtc/modules/audio_processing/agc/circular_buffer_unittest.cc
Normal file
@ -0,0 +1,132 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/circular_buffer.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kWidthThreshold = 7;
|
||||
static const double kValThreshold = 1.0;
|
||||
static const int kLongBuffSize = 100;
|
||||
static const int kShortBuffSize = 10;
|
||||
|
||||
static void InsertSequentially(int k, AgcCircularBuffer* circular_buffer) {
|
||||
double mean_val;
|
||||
for (int n = 1; n <= k; n++) {
|
||||
EXPECT_TRUE(!circular_buffer->is_full());
|
||||
circular_buffer->Insert(n);
|
||||
mean_val = circular_buffer->Mean();
|
||||
EXPECT_EQ((n + 1.0) / 2., mean_val);
|
||||
}
|
||||
}
|
||||
|
||||
static void Insert(double value, int num_insertion,
|
||||
AgcCircularBuffer* circular_buffer) {
|
||||
for (int n = 0; n < num_insertion; n++)
|
||||
circular_buffer->Insert(value);
|
||||
}
|
||||
|
||||
static void InsertZeros(int num_zeros, AgcCircularBuffer* circular_buffer) {
|
||||
Insert(0.0, num_zeros, circular_buffer);
|
||||
}
|
||||
|
||||
TEST(AgcCircularBufferTest, GeneralTest) {
|
||||
scoped_ptr<AgcCircularBuffer> circular_buffer(
|
||||
AgcCircularBuffer::Create(kShortBuffSize));
|
||||
double mean_val;
|
||||
|
||||
// Mean should return zero if nothing is inserted.
|
||||
mean_val = circular_buffer->Mean();
|
||||
EXPECT_DOUBLE_EQ(0.0, mean_val);
|
||||
InsertSequentially(kShortBuffSize, circular_buffer.get());
|
||||
|
||||
// Should be full.
|
||||
EXPECT_TRUE(circular_buffer->is_full());
|
||||
// Correct update after being full.
|
||||
for (int n = 1; n < kShortBuffSize; n++) {
|
||||
circular_buffer->Insert(n);
|
||||
mean_val = circular_buffer->Mean();
|
||||
EXPECT_DOUBLE_EQ((kShortBuffSize + 1.) / 2., mean_val);
|
||||
EXPECT_TRUE(circular_buffer->is_full());
|
||||
}
|
||||
|
||||
// Check reset. This should be like starting fresh.
|
||||
circular_buffer->Reset();
|
||||
mean_val = circular_buffer->Mean();
|
||||
EXPECT_DOUBLE_EQ(0, mean_val);
|
||||
InsertSequentially(kShortBuffSize, circular_buffer.get());
|
||||
EXPECT_TRUE(circular_buffer->is_full());
|
||||
}
|
||||
|
||||
TEST(AgcCircularBufferTest, TransientsRemoval) {
|
||||
scoped_ptr<AgcCircularBuffer> circular_buffer(
|
||||
AgcCircularBuffer::Create(kLongBuffSize));
|
||||
// Let the first transient be in wrap-around.
|
||||
InsertZeros(kLongBuffSize - kWidthThreshold / 2, circular_buffer.get());
|
||||
|
||||
double push_val = kValThreshold;
|
||||
double mean_val;
|
||||
for (int k = kWidthThreshold; k >= 1; k--) {
|
||||
Insert(push_val, k, circular_buffer.get());
|
||||
circular_buffer->Insert(0);
|
||||
mean_val = circular_buffer->Mean();
|
||||
EXPECT_DOUBLE_EQ(k * push_val / kLongBuffSize, mean_val);
|
||||
circular_buffer->RemoveTransient(kWidthThreshold, kValThreshold);
|
||||
mean_val = circular_buffer->Mean();
|
||||
EXPECT_DOUBLE_EQ(0, mean_val);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(AgcCircularBufferTest, TransientDetection) {
|
||||
scoped_ptr<AgcCircularBuffer> circular_buffer(
|
||||
AgcCircularBuffer::Create(kLongBuffSize));
|
||||
// Let the first transient be in wrap-around.
|
||||
int num_insertion = kLongBuffSize - kWidthThreshold / 2;
|
||||
InsertZeros(num_insertion, circular_buffer.get());
|
||||
|
||||
double push_val = 2;
|
||||
// This is longer than a transient and shouldn't be removed.
|
||||
int num_non_zero_elements = kWidthThreshold + 1;
|
||||
Insert(push_val, num_non_zero_elements, circular_buffer.get());
|
||||
|
||||
double mean_val = circular_buffer->Mean();
|
||||
EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val);
|
||||
circular_buffer->Insert(0);
|
||||
EXPECT_EQ(0, circular_buffer->RemoveTransient(kWidthThreshold,
|
||||
kValThreshold));
|
||||
mean_val = circular_buffer->Mean();
|
||||
EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val);
|
||||
|
||||
// A transient right after a non-transient, should be removed and mean is
|
||||
// not changed.
|
||||
num_insertion = 3;
|
||||
Insert(push_val, num_insertion, circular_buffer.get());
|
||||
circular_buffer->Insert(0);
|
||||
EXPECT_EQ(0, circular_buffer->RemoveTransient(kWidthThreshold,
|
||||
kValThreshold));
|
||||
mean_val = circular_buffer->Mean();
|
||||
EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val);
|
||||
|
||||
// Last input is larger than threshold, although the sequence is short but
|
||||
// it shouldn't be considered transient.
|
||||
Insert(push_val, num_insertion, circular_buffer.get());
|
||||
num_non_zero_elements += num_insertion;
|
||||
EXPECT_EQ(0, circular_buffer->RemoveTransient(kWidthThreshold,
|
||||
kValThreshold));
|
||||
mean_val = circular_buffer->Mean();
|
||||
EXPECT_DOUBLE_EQ(num_non_zero_elements * push_val / kLongBuffSize, mean_val);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
27
webrtc/modules/audio_processing/agc/common.h
Normal file
27
webrtc/modules/audio_processing/agc/common.h
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_COMMON_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_COMMON_H_
|
||||
|
||||
static const int kSampleRateHz = 16000;
|
||||
static const int kLength10Ms = kSampleRateHz / 100;
|
||||
static const int kMaxNumFrames = 4;
|
||||
|
||||
struct AudioFeatures {
|
||||
double log_pitch_gain[kMaxNumFrames];
|
||||
double pitch_lag_hz[kMaxNumFrames];
|
||||
double spectral_peak[kMaxNumFrames];
|
||||
double rms[kMaxNumFrames];
|
||||
int num_frames;
|
||||
bool silence;
|
||||
};
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_COMMON_H_
|
275
webrtc/modules/audio_processing/agc/gain_map_internal.h
Normal file
275
webrtc/modules/audio_processing/agc/gain_map_internal.h
Normal file
@ -0,0 +1,275 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
|
||||
|
||||
static const int kGainMapSize = 256;
|
||||
// Uses parameters: si = 2, sf = 0.25, D = 8/256
|
||||
static const int kGainMap[kGainMapSize] = {
|
||||
-56,
|
||||
-54,
|
||||
-52,
|
||||
-50,
|
||||
-48,
|
||||
-47,
|
||||
-45,
|
||||
-43,
|
||||
-42,
|
||||
-40,
|
||||
-38,
|
||||
-37,
|
||||
-35,
|
||||
-34,
|
||||
-33,
|
||||
-31,
|
||||
-30,
|
||||
-29,
|
||||
-27,
|
||||
-26,
|
||||
-25,
|
||||
-24,
|
||||
-23,
|
||||
-22,
|
||||
-20,
|
||||
-19,
|
||||
-18,
|
||||
-17,
|
||||
-16,
|
||||
-15,
|
||||
-14,
|
||||
-14,
|
||||
-13,
|
||||
-12,
|
||||
-11,
|
||||
-10,
|
||||
-9,
|
||||
-8,
|
||||
-8,
|
||||
-7,
|
||||
-6,
|
||||
-5,
|
||||
-5,
|
||||
-4,
|
||||
-3,
|
||||
-2,
|
||||
-2,
|
||||
-1,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
4,
|
||||
4,
|
||||
5,
|
||||
5,
|
||||
6,
|
||||
6,
|
||||
7,
|
||||
7,
|
||||
8,
|
||||
8,
|
||||
9,
|
||||
9,
|
||||
10,
|
||||
10,
|
||||
11,
|
||||
11,
|
||||
12,
|
||||
12,
|
||||
13,
|
||||
13,
|
||||
13,
|
||||
14,
|
||||
14,
|
||||
15,
|
||||
15,
|
||||
15,
|
||||
16,
|
||||
16,
|
||||
17,
|
||||
17,
|
||||
17,
|
||||
18,
|
||||
18,
|
||||
18,
|
||||
19,
|
||||
19,
|
||||
19,
|
||||
20,
|
||||
20,
|
||||
21,
|
||||
21,
|
||||
21,
|
||||
22,
|
||||
22,
|
||||
22,
|
||||
23,
|
||||
23,
|
||||
23,
|
||||
24,
|
||||
24,
|
||||
24,
|
||||
24,
|
||||
25,
|
||||
25,
|
||||
25,
|
||||
26,
|
||||
26,
|
||||
26,
|
||||
27,
|
||||
27,
|
||||
27,
|
||||
28,
|
||||
28,
|
||||
28,
|
||||
28,
|
||||
29,
|
||||
29,
|
||||
29,
|
||||
30,
|
||||
30,
|
||||
30,
|
||||
30,
|
||||
31,
|
||||
31,
|
||||
31,
|
||||
32,
|
||||
32,
|
||||
32,
|
||||
32,
|
||||
33,
|
||||
33,
|
||||
33,
|
||||
33,
|
||||
34,
|
||||
34,
|
||||
34,
|
||||
35,
|
||||
35,
|
||||
35,
|
||||
35,
|
||||
36,
|
||||
36,
|
||||
36,
|
||||
36,
|
||||
37,
|
||||
37,
|
||||
37,
|
||||
38,
|
||||
38,
|
||||
38,
|
||||
38,
|
||||
39,
|
||||
39,
|
||||
39,
|
||||
39,
|
||||
40,
|
||||
40,
|
||||
40,
|
||||
40,
|
||||
41,
|
||||
41,
|
||||
41,
|
||||
41,
|
||||
42,
|
||||
42,
|
||||
42,
|
||||
42,
|
||||
43,
|
||||
43,
|
||||
43,
|
||||
44,
|
||||
44,
|
||||
44,
|
||||
44,
|
||||
45,
|
||||
45,
|
||||
45,
|
||||
45,
|
||||
46,
|
||||
46,
|
||||
46,
|
||||
46,
|
||||
47,
|
||||
47,
|
||||
47,
|
||||
47,
|
||||
48,
|
||||
48,
|
||||
48,
|
||||
48,
|
||||
49,
|
||||
49,
|
||||
49,
|
||||
49,
|
||||
50,
|
||||
50,
|
||||
50,
|
||||
50,
|
||||
51,
|
||||
51,
|
||||
51,
|
||||
51,
|
||||
52,
|
||||
52,
|
||||
52,
|
||||
52,
|
||||
53,
|
||||
53,
|
||||
53,
|
||||
53,
|
||||
54,
|
||||
54,
|
||||
54,
|
||||
54,
|
||||
55,
|
||||
55,
|
||||
55,
|
||||
55,
|
||||
56,
|
||||
56,
|
||||
56,
|
||||
56,
|
||||
57,
|
||||
57,
|
||||
57,
|
||||
57,
|
||||
58,
|
||||
58,
|
||||
58,
|
||||
58,
|
||||
59,
|
||||
59,
|
||||
59,
|
||||
59,
|
||||
60,
|
||||
60,
|
||||
60,
|
||||
60,
|
||||
61,
|
||||
61,
|
||||
61,
|
||||
61,
|
||||
62,
|
||||
62,
|
||||
62,
|
||||
62,
|
||||
63,
|
||||
63,
|
||||
63,
|
||||
63,
|
||||
64
|
||||
};
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
|
61
webrtc/modules/audio_processing/agc/gmm.cc
Normal file
61
webrtc/modules/audio_processing/agc/gmm.cc
Normal file
@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/gmm.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kMaxDimension = 10;
|
||||
|
||||
static void RemoveMean(const double* in, const double* mean_vec,
|
||||
int dimension, double* out) {
|
||||
for (int n = 0; n < dimension; ++n)
|
||||
out[n] = in[n] - mean_vec[n];
|
||||
}
|
||||
|
||||
static double ComputeExponent(const double* in, const double* covar_inv,
|
||||
int dimension) {
|
||||
double q = 0;
|
||||
for (int i = 0; i < dimension; ++i) {
|
||||
double v = 0;
|
||||
for (int j = 0; j < dimension; j++)
|
||||
v += (*covar_inv++) * in[j];
|
||||
q += v * in[i];
|
||||
}
|
||||
q *= -0.5;
|
||||
return q;
|
||||
}
|
||||
|
||||
double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters) {
|
||||
if (gmm_parameters.dimension > kMaxDimension) {
|
||||
return -1; // This is invalid pdf so the caller can check this.
|
||||
}
|
||||
double f = 0;
|
||||
double v[kMaxDimension];
|
||||
const double* mean_vec = gmm_parameters.mean;
|
||||
const double* covar_inv = gmm_parameters.covar_inverse;
|
||||
|
||||
for (int n = 0; n < gmm_parameters.num_mixtures; n++) {
|
||||
RemoveMean(x, mean_vec, gmm_parameters.dimension, v);
|
||||
double q = ComputeExponent(v, covar_inv, gmm_parameters.dimension) +
|
||||
gmm_parameters.weight[n];
|
||||
f += exp(q);
|
||||
mean_vec += gmm_parameters.dimension;
|
||||
covar_inv += gmm_parameters.dimension * gmm_parameters.dimension;
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
45
webrtc/modules/audio_processing/agc/gmm.h
Normal file
45
webrtc/modules/audio_processing/agc/gmm.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GMM_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GMM_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// A structure that specifies a GMM.
|
||||
// A GMM is formulated as
|
||||
// f(x) = w[0] * mixture[0] + w[1] * mixture[1] + ... +
|
||||
// w[num_mixtures - 1] * mixture[num_mixtures - 1];
|
||||
// Where a 'mixture' is a Gaussian density.
|
||||
|
||||
struct GmmParameters {
|
||||
// weight[n] = log(w[n]) - |dimension|/2 * log(2*pi) - 1/2 * log(det(cov[n]));
|
||||
// where cov[n] is the covariance matrix of mixture n;
|
||||
const double* weight;
|
||||
// pointer to the first element of a |num_mixtures|x|dimension| matrix
|
||||
// where kth row is the mean of the kth mixture.
|
||||
const double* mean;
|
||||
// pointer to the first element of a |num_mixtures|x|dimension|x|dimension|
|
||||
// 3D-matrix, where the kth 2D-matrix is the inverse of the covariance
|
||||
// matrix of the kth mixture.
|
||||
const double* covar_inverse;
|
||||
// Dimensionality of the mixtures.
|
||||
int dimension;
|
||||
// number of the mixtures.
|
||||
int num_mixtures;
|
||||
};
|
||||
|
||||
// Evaluate the given GMM, according to |gmm_parameters|, at the given point
|
||||
// |x|. If the dimensionality of the given GMM is larger that the maximum
|
||||
// acceptable dimension by the following function -1 is returned.
|
||||
double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters);
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GMM_H_
|
65
webrtc/modules/audio_processing/agc/gmm_unittest.cc
Normal file
65
webrtc/modules/audio_processing/agc/gmm_unittest.cc
Normal file
@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/gmm.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "webrtc/modules/audio_processing/agc/noise_gmm_tables.h"
|
||||
#include "webrtc/modules/audio_processing/agc/voice_gmm_tables.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(GmmTest, EvaluateGmm) {
|
||||
GmmParameters noise_gmm;
|
||||
GmmParameters voice_gmm;
|
||||
|
||||
// Setup noise GMM.
|
||||
noise_gmm.dimension = kNoiseGmmDim;
|
||||
noise_gmm.num_mixtures = kNoiseGmmNumMixtures;
|
||||
noise_gmm.weight = kNoiseGmmWeights;
|
||||
noise_gmm.mean = &kNoiseGmmMean[0][0];
|
||||
noise_gmm.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
|
||||
|
||||
// Setup voice GMM.
|
||||
voice_gmm.dimension = kVoiceGmmDim;
|
||||
voice_gmm.num_mixtures = kVoiceGmmNumMixtures;
|
||||
voice_gmm.weight = kVoiceGmmWeights;
|
||||
voice_gmm.mean = &kVoiceGmmMean[0][0];
|
||||
voice_gmm.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
|
||||
|
||||
// Test vectors. These are the mean of the GMM means.
|
||||
const double kXVoice[kVoiceGmmDim] = {
|
||||
-1.35893162459863, 602.862491970368, 178.022069191324};
|
||||
const double kXNoise[kNoiseGmmDim] = {
|
||||
-2.33443722724409, 2827.97828765184, 141.114178166812};
|
||||
|
||||
// Expected pdf values. These values are computed in MATLAB using EvalGmm.m
|
||||
const double kPdfNoise = 1.88904409403101e-07;
|
||||
const double kPdfVoice = 1.30453996982266e-06;
|
||||
|
||||
// Relative error should be smaller that the following value.
|
||||
const double kAcceptedRelativeErr = 1e-10;
|
||||
|
||||
// Test Voice.
|
||||
double pdf = EvaluateGmm(kXVoice, voice_gmm);
|
||||
EXPECT_GT(pdf, 0);
|
||||
double relative_error = fabs(pdf - kPdfVoice) / kPdfVoice;
|
||||
EXPECT_LE(relative_error, kAcceptedRelativeErr);
|
||||
|
||||
// Test Noise.
|
||||
pdf = EvaluateGmm(kXNoise, noise_gmm);
|
||||
EXPECT_GT(pdf, 0);
|
||||
relative_error = fabs(pdf - kPdfNoise) / kPdfNoise;
|
||||
EXPECT_LE(relative_error, kAcceptedRelativeErr);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
228
webrtc/modules/audio_processing/agc/histogram.cc
Normal file
228
webrtc/modules/audio_processing/agc/histogram.cc
Normal file
@ -0,0 +1,228 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/histogram.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "webrtc/system_wrappers/interface/compile_assert.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const double kHistBinCenters[] = {
|
||||
7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01,
|
||||
1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01,
|
||||
2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01,
|
||||
3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01,
|
||||
5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01,
|
||||
1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00,
|
||||
1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00,
|
||||
2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00,
|
||||
4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00,
|
||||
7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01,
|
||||
1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01,
|
||||
2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01,
|
||||
3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01,
|
||||
6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01,
|
||||
1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02,
|
||||
1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02,
|
||||
2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02,
|
||||
4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02,
|
||||
8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03,
|
||||
1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03,
|
||||
2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03,
|
||||
3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03,
|
||||
6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03,
|
||||
1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04,
|
||||
1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04,
|
||||
3.00339145144454e+04, 3.56647189489147e+04};
|
||||
|
||||
static const double kProbQDomain = 1024.0;
|
||||
// Loudness of -15 dB (smallest expected loudness) in log domain,
|
||||
// loudness_db = 13.5 * log10(rms);
|
||||
static const double kLogDomainMinBinCenter = -2.57752062648587;
|
||||
// Loudness step of 1 dB in log domain
|
||||
static const double kLogDomainStepSizeInverse = 5.81954605750359;
|
||||
|
||||
static const int kTransientWidthThreshold = 7;
|
||||
static const double kLowProbabilityThreshold = 0.2;
|
||||
|
||||
static const int kLowProbThresholdQ10 = static_cast<int>(
|
||||
kLowProbabilityThreshold * kProbQDomain);
|
||||
|
||||
Histogram::Histogram()
|
||||
: num_updates_(0),
|
||||
audio_content_q10_(0),
|
||||
bin_count_q10_(),
|
||||
activity_probability_(),
|
||||
hist_bin_index_(),
|
||||
buffer_index_(0),
|
||||
buffer_is_full_(false),
|
||||
len_circular_buffer_(0),
|
||||
len_high_activity_(0) {
|
||||
COMPILE_ASSERT(kHistSize == sizeof(kHistBinCenters) /
|
||||
sizeof(kHistBinCenters[0]), histogram_bin_centers_incorrect_size);
|
||||
}
|
||||
|
||||
Histogram::Histogram(int window_size)
|
||||
: num_updates_(0),
|
||||
audio_content_q10_(0),
|
||||
bin_count_q10_(),
|
||||
activity_probability_(new int[window_size]),
|
||||
hist_bin_index_(new int[window_size]),
|
||||
buffer_index_(0),
|
||||
buffer_is_full_(false),
|
||||
len_circular_buffer_(window_size),
|
||||
len_high_activity_(0) {}
|
||||
|
||||
Histogram::~Histogram() {}
|
||||
|
||||
void Histogram::Update(double rms, double activity_probaility) {
|
||||
// If circular histogram is activated then remove the oldest entry.
|
||||
if (len_circular_buffer_ > 0)
|
||||
RemoveOldestEntryAndUpdate();
|
||||
|
||||
// Find the corresponding bin.
|
||||
int hist_index = GetBinIndex(rms);
|
||||
// To Q10 domain.
|
||||
int prob_q10 = static_cast<int16_t>(floor(activity_probaility *
|
||||
kProbQDomain));
|
||||
InsertNewestEntryAndUpdate(prob_q10, hist_index);
|
||||
}
|
||||
|
||||
// Doing nothing if buffer is not full, yet.
|
||||
void Histogram::RemoveOldestEntryAndUpdate() {
|
||||
assert(len_circular_buffer_ > 0);
|
||||
// Do nothing if circular buffer is not full.
|
||||
if (!buffer_is_full_)
|
||||
return;
|
||||
|
||||
int oldest_prob = activity_probability_[buffer_index_];
|
||||
int oldest_hist_index = hist_bin_index_[buffer_index_];
|
||||
UpdateHist(-oldest_prob, oldest_hist_index);
|
||||
}
|
||||
|
||||
void Histogram::RemoveTransient() {
|
||||
// Don't expect to be here if high-activity region is longer than
|
||||
// |kTransientWidthThreshold| or there has not been any transient.
|
||||
assert(len_high_activity_ <= kTransientWidthThreshold);
|
||||
int index = (buffer_index_ > 0) ? (buffer_index_ - 1) :
|
||||
len_circular_buffer_ - 1;
|
||||
while (len_high_activity_ > 0) {
|
||||
UpdateHist(-activity_probability_[index], hist_bin_index_[index]);
|
||||
activity_probability_[index] = 0;
|
||||
index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1);
|
||||
len_high_activity_--;
|
||||
}
|
||||
}
|
||||
|
||||
void Histogram::InsertNewestEntryAndUpdate(int activity_prob_q10,
|
||||
int hist_index) {
|
||||
// Update the circular buffer if it is enabled.
|
||||
if (len_circular_buffer_ > 0) {
|
||||
// Removing transient.
|
||||
if (activity_prob_q10 <= kLowProbThresholdQ10) {
|
||||
// Lower than threshold probability, set it to zero.
|
||||
activity_prob_q10 = 0;
|
||||
// Check if this has been a transient.
|
||||
if (len_high_activity_ <= kTransientWidthThreshold)
|
||||
RemoveTransient(); // Remove this transient.
|
||||
len_high_activity_ = 0;
|
||||
} else if (len_high_activity_ <= kTransientWidthThreshold) {
|
||||
len_high_activity_++;
|
||||
}
|
||||
// Updating the circular buffer.
|
||||
activity_probability_[buffer_index_] = activity_prob_q10;
|
||||
hist_bin_index_[buffer_index_] = hist_index;
|
||||
// Increment the buffer index and check for wrap-around.
|
||||
buffer_index_++;
|
||||
if (buffer_index_ >= len_circular_buffer_) {
|
||||
buffer_index_ = 0;
|
||||
buffer_is_full_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
num_updates_++;
|
||||
if (num_updates_ < 0)
|
||||
num_updates_--;
|
||||
|
||||
UpdateHist(activity_prob_q10, hist_index);
|
||||
}
|
||||
|
||||
void Histogram::UpdateHist(int activity_prob_q10, int hist_index) {
|
||||
bin_count_q10_[hist_index] += activity_prob_q10;
|
||||
audio_content_q10_ += activity_prob_q10;
|
||||
}
|
||||
|
||||
double Histogram::AudioContent() const {
|
||||
return audio_content_q10_ / kProbQDomain;
|
||||
}
|
||||
|
||||
Histogram* Histogram::Create() {
|
||||
return new Histogram;
|
||||
}
|
||||
|
||||
Histogram* Histogram::Create(int window_size) {
|
||||
if (window_size < 0)
|
||||
return NULL;
|
||||
return new Histogram(window_size);
|
||||
}
|
||||
|
||||
void Histogram::Reset() {
|
||||
// Reset the histogram, audio-content and number of updates.
|
||||
memset(bin_count_q10_, 0, sizeof(bin_count_q10_));
|
||||
audio_content_q10_ = 0;
|
||||
num_updates_ = 0;
|
||||
// Empty the circular buffer.
|
||||
buffer_index_ = 0;
|
||||
buffer_is_full_ = false;
|
||||
len_high_activity_ = 0;
|
||||
}
|
||||
|
||||
int Histogram::GetBinIndex(double rms) {
|
||||
// First exclude overload cases.
|
||||
if (rms <= kHistBinCenters[0]) {
|
||||
return 0;
|
||||
} else if (rms >= kHistBinCenters[kHistSize - 1]) {
|
||||
return kHistSize - 1;
|
||||
} else {
|
||||
// The quantizer is uniform in log domain. Alternatively we could do binary
|
||||
// search in linear domain.
|
||||
double rms_log = log(rms);
|
||||
|
||||
int index = static_cast<int>(floor((rms_log - kLogDomainMinBinCenter) *
|
||||
kLogDomainStepSizeInverse));
|
||||
// The final decision is in linear domain.
|
||||
double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]);
|
||||
if (rms > b) {
|
||||
return index + 1;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
}
|
||||
|
||||
double Histogram::CurrentRms() const {
|
||||
double p;
|
||||
double mean_val = 0;
|
||||
if (audio_content_q10_ > 0) {
|
||||
double p_total_inverse = 1. / static_cast<double>(audio_content_q10_);
|
||||
for (int n = 0; n < kHistSize; n++) {
|
||||
p = static_cast<double>(bin_count_q10_[n]) * p_total_inverse;
|
||||
mean_val += p * kHistBinCenters[n];
|
||||
}
|
||||
} else {
|
||||
mean_val = kHistBinCenters[0];
|
||||
}
|
||||
return mean_val;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
91
webrtc/modules/audio_processing/agc/histogram.h
Normal file
91
webrtc/modules/audio_processing/agc/histogram.h
Normal file
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This class implements the histogram of loudness with circular buffers so that
|
||||
// the histogram tracks the last T seconds of the loudness.
|
||||
class Histogram {
|
||||
public:
|
||||
// Create a non-sliding Histogram.
|
||||
static Histogram* Create();
|
||||
|
||||
// Create a sliding Histogram, i.e. the histogram represents the last
|
||||
// |window_size| samples.
|
||||
static Histogram* Create(int window_size);
|
||||
~Histogram();
|
||||
|
||||
// Insert RMS and the corresponding activity probability.
|
||||
void Update(double rms, double activity_probability);
|
||||
|
||||
// Reset the histogram, forget the past.
|
||||
void Reset();
|
||||
|
||||
// Current loudness, which is actually the mean of histogram in loudness
|
||||
// domain.
|
||||
double CurrentRms() const;
|
||||
|
||||
// Sum of the histogram content.
|
||||
double AudioContent() const;
|
||||
|
||||
// Number of times the histogram has been updated.
|
||||
int num_updates() const { return num_updates_; }
|
||||
|
||||
private:
|
||||
Histogram();
|
||||
explicit Histogram(int window);
|
||||
|
||||
// Find the histogram bin associated with the given |rms|.
|
||||
int GetBinIndex(double rms);
|
||||
|
||||
void RemoveOldestEntryAndUpdate();
|
||||
void InsertNewestEntryAndUpdate(int activity_prob_q10, int hist_index);
|
||||
void UpdateHist(int activity_prob_q10, int hist_index);
|
||||
void RemoveTransient();
|
||||
|
||||
// Number of histogram bins.
|
||||
static const int kHistSize = 77;
|
||||
|
||||
// Number of times the histogram is updated
|
||||
int num_updates_;
|
||||
// Audio content, this should be equal to the sum of the components of
|
||||
// |bin_count_q10_|.
|
||||
int64_t audio_content_q10_;
|
||||
|
||||
// Histogram of input RMS in Q10 with |kHistSize_| bins. In each 'Update(),'
|
||||
// we increment the associated histogram-bin with the given probability. The
|
||||
// increment is implemented in Q10 to avoid rounding errors.
|
||||
int64_t bin_count_q10_[kHistSize];
|
||||
|
||||
// Circular buffer for probabilities
|
||||
scoped_ptr<int[]> activity_probability_;
|
||||
// Circular buffer for histogram-indices of probabilities.
|
||||
scoped_ptr<int[]> hist_bin_index_;
|
||||
// Current index of circular buffer, where the newest data will be written to,
|
||||
// therefore, pointing to the oldest data if buffer is full.
|
||||
int buffer_index_;
|
||||
// Indicating if buffer is full and we had a wrap around.
|
||||
int buffer_is_full_;
|
||||
// Size of circular buffer.
|
||||
int len_circular_buffer_;
|
||||
int len_high_activity_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
|
104
webrtc/modules/audio_processing/agc/histogram_unittest.cc
Normal file
104
webrtc/modules/audio_processing/agc/histogram_unittest.cc
Normal file
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// Use CreateHistUnittestFile.m to generate the input file.
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/histogram.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <cmath>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "webrtc/test/testsupport/fileutils.h"
|
||||
#include "webrtc/modules/audio_processing/agc/utility.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
struct InputOutput {
|
||||
double rms;
|
||||
double activity_probability;
|
||||
double audio_content;
|
||||
double loudness;
|
||||
};
|
||||
|
||||
const double kRelativeErrTol = 1e-10;
|
||||
|
||||
class HistogramTest : public ::testing::Test {
|
||||
protected:
|
||||
void RunTest(bool enable_circular_buff,
|
||||
const char* filename);
|
||||
|
||||
private:
|
||||
void TestClean();
|
||||
scoped_ptr<Histogram> hist_;
|
||||
};
|
||||
|
||||
void HistogramTest::TestClean() {
|
||||
EXPECT_EQ(hist_->CurrentRms(), 7.59621091765857e-02);
|
||||
EXPECT_EQ(hist_->AudioContent(), 0);
|
||||
EXPECT_EQ(hist_->num_updates(), 0);
|
||||
}
|
||||
|
||||
void HistogramTest::RunTest(bool enable_circular_buff, const char* filename) {
|
||||
FILE* in_file = fopen(filename, "rb");
|
||||
ASSERT_TRUE(in_file != NULL);
|
||||
if (enable_circular_buff) {
|
||||
int buffer_size;
|
||||
EXPECT_EQ(fread(&buffer_size, sizeof(buffer_size), 1, in_file), 1u);
|
||||
hist_.reset(Histogram::Create(buffer_size));
|
||||
} else {
|
||||
hist_.reset(Histogram::Create());
|
||||
}
|
||||
TestClean();
|
||||
|
||||
InputOutput io;
|
||||
int num_updates = 0;
|
||||
int num_reset = 0;
|
||||
while (fread(&io, sizeof(InputOutput), 1, in_file) == 1) {
|
||||
if (io.rms < 0) {
|
||||
// We have to reset.
|
||||
hist_->Reset();
|
||||
TestClean();
|
||||
num_updates = 0;
|
||||
num_reset++;
|
||||
// Read the next chunk of input.
|
||||
if (fread(&io, sizeof(InputOutput), 1, in_file) != 1)
|
||||
break;
|
||||
}
|
||||
hist_->Update(io.rms, io.activity_probability);
|
||||
num_updates++;
|
||||
EXPECT_EQ(hist_->num_updates(), num_updates);
|
||||
double audio_content = hist_->AudioContent();
|
||||
|
||||
double abs_err = std::min(audio_content, io.audio_content) *
|
||||
kRelativeErrTol;
|
||||
|
||||
ASSERT_NEAR(audio_content, io.audio_content, abs_err);
|
||||
double current_loudness = Linear2Loudness(hist_->CurrentRms());
|
||||
abs_err = std::min(fabs(current_loudness), fabs(io.loudness)) *
|
||||
kRelativeErrTol;
|
||||
ASSERT_NEAR(current_loudness, io.loudness, abs_err);
|
||||
}
|
||||
fclose(in_file);
|
||||
}
|
||||
|
||||
TEST_F(HistogramTest, ActiveCircularBuffer) {
|
||||
RunTest(true,
|
||||
test::ResourcePath("audio_processing/agc/agc_with_circular_buffer",
|
||||
"dat").c_str());
|
||||
}
|
||||
|
||||
TEST_F(HistogramTest, InactiveCircularBuffer) {
|
||||
RunTest(false,
|
||||
test::ResourcePath("audio_processing/agc/agc_no_circular_buffer",
|
||||
"dat").c_str());
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
36
webrtc/modules/audio_processing/agc/mock_agc.h
Normal file
36
webrtc/modules/audio_processing/agc/mock_agc.h
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/agc.h"
|
||||
|
||||
#include "gmock/gmock.h"
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class MockAgc : public Agc {
|
||||
public:
|
||||
MOCK_METHOD2(AnalyzePreproc, float(const int16_t* audio, int length));
|
||||
MOCK_METHOD3(Process, int(const int16_t* audio, int length,
|
||||
int sample_rate_hz));
|
||||
MOCK_METHOD1(GetRmsErrorDb, bool(int* error));
|
||||
MOCK_METHOD0(Reset, void());
|
||||
MOCK_METHOD1(set_target_level_dbfs, int(int level));
|
||||
MOCK_CONST_METHOD0(target_level_dbfs, int());
|
||||
MOCK_METHOD1(EnableStandaloneVad, void(bool enable));
|
||||
MOCK_CONST_METHOD0(standalone_vad_enabled, bool());
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_
|
77
webrtc/modules/audio_processing/agc/noise_gmm_tables.h
Normal file
77
webrtc/modules/audio_processing/agc/noise_gmm_tables.h
Normal file
@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// GMM tables for inactive segments. Generated by MakeGmmTables.m.
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_NOISE_GMM_TABLES_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_NOISE_GMM_TABLES_H_
|
||||
|
||||
static const int kNoiseGmmNumMixtures = 12;
|
||||
static const int kNoiseGmmDim = 3;
|
||||
|
||||
static const double kNoiseGmmCovarInverse[kNoiseGmmNumMixtures]
|
||||
[kNoiseGmmDim][kNoiseGmmDim] = {
|
||||
{{ 7.36219567592941e+00, 4.83060785179861e-03, 1.23335151497610e-02},
|
||||
{ 4.83060785179861e-03, 1.65289507047817e-04, -2.41490588169997e-04},
|
||||
{ 1.23335151497610e-02, -2.41490588169997e-04, 6.59472060689382e-03}},
|
||||
{{ 8.70265239309140e+00, -5.30636201431086e-04, 5.44014966585347e-03},
|
||||
{-5.30636201431086e-04, 3.11095453521008e-04, -1.86287206836035e-04},
|
||||
{ 5.44014966585347e-03, -1.86287206836035e-04, 6.29493388790744e-04}},
|
||||
{{ 4.53467851955055e+00, -3.92977536695197e-03, -2.46521420693317e-03},
|
||||
{-3.92977536695197e-03, 4.94650752632750e-05, -1.08587438501826e-05},
|
||||
{-2.46521420693317e-03, -1.08587438501826e-05, 9.28793975422261e-05}},
|
||||
{{ 9.26817997114275e-01, -4.03976069276753e-04, -3.56441427392165e-03},
|
||||
{-4.03976069276753e-04, 2.51976251631430e-06, 1.46914206734572e-07},
|
||||
{-3.56441427392165e-03, 1.46914206734572e-07, 8.19914567685373e-05}},
|
||||
{{ 7.61715986787441e+00, -1.54889041216888e-04, 2.41756280071656e-02},
|
||||
{-1.54889041216888e-04, 3.50282550461672e-07, -6.27251196972490e-06},
|
||||
{ 2.41756280071656e-02, -6.27251196972490e-06, 1.45061847649872e-02}},
|
||||
{{ 8.31193642663158e+00, -3.84070508164323e-04, -3.09750630821876e-02},
|
||||
{-3.84070508164323e-04, 3.80433432277336e-07, -1.14321142836636e-06},
|
||||
{-3.09750630821876e-02, -1.14321142836636e-06, 8.35091486289997e-04}},
|
||||
{{ 9.67283151270894e-01, 5.82465812445039e-05, -3.18350798617053e-03},
|
||||
{ 5.82465812445039e-05, 2.23762672000318e-07, -7.74196587408623e-07},
|
||||
{-3.18350798617053e-03, -7.74196587408623e-07, 3.85120938338325e-04}},
|
||||
{{ 8.28066236985388e+00, 5.87634508319763e-05, 6.99303090891743e-03},
|
||||
{ 5.87634508319763e-05, 2.93746018618058e-07, 3.40843332882272e-07},
|
||||
{ 6.99303090891743e-03, 3.40843332882272e-07, 1.99379171190344e-04}},
|
||||
{{ 6.07488998675646e+00, -1.11494526618473e-02, 5.10013111123381e-03},
|
||||
{-1.11494526618473e-02, 6.99238879921751e-04, 5.36718550370870e-05},
|
||||
{ 5.10013111123381e-03, 5.36718550370870e-05, 5.26909853276753e-04}},
|
||||
{{ 6.90492021419175e+00, 4.20639355257863e-04, -2.38612752336481e-03},
|
||||
{ 4.20639355257863e-04, 3.31246767338153e-06, -2.42052288150859e-08},
|
||||
{-2.38612752336481e-03, -2.42052288150859e-08, 4.46608368363412e-04}},
|
||||
{{ 1.31069150869715e+01, -1.73718583865670e-04, -1.97591814508578e-02},
|
||||
{-1.73718583865670e-04, 2.80451716300124e-07, 9.96570755379865e-07},
|
||||
{-1.97591814508578e-02, 9.96570755379865e-07, 2.41361900868847e-03}},
|
||||
{{ 4.69566344239814e+00, -2.61077567563690e-04, 5.26359000761433e-03},
|
||||
{-2.61077567563690e-04, 1.82420859823767e-06, -7.83645887541601e-07},
|
||||
{ 5.26359000761433e-03, -7.83645887541601e-07, 1.33586288288802e-02}}};
|
||||
|
||||
static const double kNoiseGmmMean[kNoiseGmmNumMixtures][kNoiseGmmDim] = {
|
||||
{-2.01386094766163e+00, 1.69702162045397e+02, 7.41715804872181e+01},
|
||||
{-1.94684591777290e+00, 1.42398396732668e+02, 1.64186321157831e+02},
|
||||
{-2.29319297562437e+00, 3.86415425589868e+02, 2.13452215267125e+02},
|
||||
{-3.25487177070268e+00, 1.08668712553616e+03, 2.33119949467419e+02},
|
||||
{-2.13159632447467e+00, 4.83821702557717e+03, 6.86786166673740e+01},
|
||||
{-2.26171410780526e+00, 4.79420193982422e+03, 1.53222513286450e+02},
|
||||
{-3.32166740703185e+00, 4.35161135834358e+03, 1.33206448431316e+02},
|
||||
{-2.19290322814343e+00, 3.98325506609408e+03, 2.13249167359934e+02},
|
||||
{-2.02898459255404e+00, 7.37039893155007e+03, 1.12518527491926e+02},
|
||||
{-2.26150236399500e+00, 1.54896745196145e+03, 1.49717357868579e+02},
|
||||
{-2.00417668301790e+00, 3.82434760310304e+03, 1.07438913004312e+02},
|
||||
{-2.30193040814533e+00, 1.43953696546439e+03, 7.04085275122649e+01}};
|
||||
|
||||
static const double kNoiseGmmWeights[kNoiseGmmNumMixtures] = {
|
||||
-1.09422832086193e+01, -1.10847897513425e+01, -1.36767587732187e+01,
|
||||
-1.79789356118641e+01, -1.42830169160894e+01, -1.56500228061379e+01,
|
||||
-1.83124990950113e+01, -1.69979436177477e+01, -1.12329424387828e+01,
|
||||
-1.41311785780639e+01, -1.47171861448585e+01, -1.35963362781839e+01};
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_NOISE_GMM_TABLES_H_
|
123
webrtc/modules/audio_processing/agc/pitch_based_vad.cc
Normal file
123
webrtc/modules/audio_processing/agc/pitch_based_vad.cc
Normal file
@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/pitch_based_vad.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/circular_buffer.h"
|
||||
#include "webrtc/modules/audio_processing/agc/common.h"
|
||||
#include "webrtc/modules/audio_processing/agc/noise_gmm_tables.h"
|
||||
#include "webrtc/modules/audio_processing/agc/voice_gmm_tables.h"
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "webrtc/system_wrappers/interface/compile_assert.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
COMPILE_ASSERT(kNoiseGmmDim == kVoiceGmmDim,
|
||||
noise_and_voice_gmm_dimension_not_equal);
|
||||
|
||||
// These values should match MATLAB counterparts for unit-tests to pass.
|
||||
static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames.
|
||||
static const double kInitialPriorProbability = 0.3;
|
||||
static const int kTransientWidthThreshold = 7;
|
||||
static const double kLowProbabilityThreshold = 0.2;
|
||||
|
||||
static double LimitProbability(double p) {
|
||||
const double kLimHigh = 0.99;
|
||||
const double kLimLow = 0.01;
|
||||
|
||||
if (p > kLimHigh)
|
||||
p = kLimHigh;
|
||||
else if (p < kLimLow)
|
||||
p = kLimLow;
|
||||
return p;
|
||||
}
|
||||
|
||||
PitchBasedVad::PitchBasedVad()
|
||||
: p_prior_(kInitialPriorProbability),
|
||||
circular_buffer_(AgcCircularBuffer::Create(kPosteriorHistorySize)) {
|
||||
// Setup noise GMM.
|
||||
noise_gmm_.dimension = kNoiseGmmDim;
|
||||
noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
|
||||
noise_gmm_.weight = kNoiseGmmWeights;
|
||||
noise_gmm_.mean = &kNoiseGmmMean[0][0];
|
||||
noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
|
||||
|
||||
// Setup voice GMM.
|
||||
voice_gmm_.dimension = kVoiceGmmDim;
|
||||
voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
|
||||
voice_gmm_.weight = kVoiceGmmWeights;
|
||||
voice_gmm_.mean = &kVoiceGmmMean[0][0];
|
||||
voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
|
||||
}
|
||||
|
||||
PitchBasedVad::~PitchBasedVad() {}
|
||||
|
||||
int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
|
||||
double* p_combined) {
|
||||
double p;
|
||||
double gmm_features[3];
|
||||
double pdf_features_given_voice;
|
||||
double pdf_features_given_noise;
|
||||
// These limits are the same in matlab implementation 'VoicingProbGMM().'
|
||||
const double kLimLowLogPitchGain = -2.0;
|
||||
const double kLimHighLogPitchGain = -0.9;
|
||||
const double kLimLowSpectralPeak = 200;
|
||||
const double kLimHighSpectralPeak = 2000;
|
||||
const double kEps = 1e-12;
|
||||
for (int n = 0; n < features.num_frames; n++) {
|
||||
gmm_features[0] = features.log_pitch_gain[n];
|
||||
gmm_features[1] = features.spectral_peak[n];
|
||||
gmm_features[2] = features.pitch_lag_hz[n];
|
||||
|
||||
pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
|
||||
pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
|
||||
|
||||
if (features.spectral_peak[n] < kLimLowSpectralPeak ||
|
||||
features.spectral_peak[n] > kLimHighSpectralPeak ||
|
||||
features.log_pitch_gain[n] < kLimLowLogPitchGain) {
|
||||
pdf_features_given_voice = kEps * pdf_features_given_noise;
|
||||
} else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
|
||||
pdf_features_given_noise = kEps * pdf_features_given_voice;
|
||||
}
|
||||
|
||||
p = p_prior_ * pdf_features_given_voice / (pdf_features_given_voice *
|
||||
p_prior_ + pdf_features_given_noise * (1 - p_prior_));
|
||||
|
||||
p = LimitProbability(p);
|
||||
|
||||
// Combine pitch-based probability with standalone probability, before
|
||||
// updating prior probabilities.
|
||||
double prod_active = p * p_combined[n];
|
||||
double prod_inactive = (1 - p) * (1 - p_combined[n]);
|
||||
p_combined[n] = prod_active / (prod_active + prod_inactive);
|
||||
|
||||
if (UpdatePrior(p_combined[n]) < 0)
|
||||
return -1;
|
||||
// Limit prior probability. With a zero prior probability the posterior
|
||||
// probability is always zero.
|
||||
p_prior_ = LimitProbability(p_prior_);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int PitchBasedVad::UpdatePrior(double p) {
|
||||
circular_buffer_->Insert(p);
|
||||
if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
|
||||
kLowProbabilityThreshold) < 0)
|
||||
return -1;
|
||||
p_prior_ = circular_buffer_->Mean();
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
56
webrtc/modules/audio_processing/agc/pitch_based_vad.h
Normal file
56
webrtc/modules/audio_processing/agc/pitch_based_vad.h
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_BASED_VAD_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_BASED_VAD_H_
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/common.h"
|
||||
#include "webrtc/modules/audio_processing/agc/gmm.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioFrame;
|
||||
class AgcCircularBuffer;
|
||||
|
||||
// Computes the probability of the input audio frame to be active given
|
||||
// the corresponding pitch-gain and lag of the frame.
|
||||
class PitchBasedVad {
|
||||
public:
|
||||
PitchBasedVad();
|
||||
~PitchBasedVad();
|
||||
|
||||
// Compute pitch-based voicing probability, given the features.
|
||||
// features: a structure containing features required for computing voicing
|
||||
// probabilities.
|
||||
//
|
||||
// p_combined: an array which contains the combined activity probabilities
|
||||
// computed prior to the call of this function. The method,
|
||||
// then, computes the voicing probabilities and combine them
|
||||
// with the given values. The result are returned in |p|.
|
||||
int VoicingProbability(const AudioFeatures& features, double* p_combined);
|
||||
private:
|
||||
int UpdatePrior(double p);
|
||||
|
||||
// TODO(turajs): maybe defining this at a higher level (maybe enum) so that
|
||||
// all the code recognize it as "no-error."
|
||||
static const int kNoError = 0;
|
||||
|
||||
GmmParameters noise_gmm_;
|
||||
GmmParameters voice_gmm_;
|
||||
|
||||
double p_prior_;
|
||||
|
||||
scoped_ptr<AgcCircularBuffer> circular_buffer_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_BASED_VAD_H_
|
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/pitch_based_vad.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "webrtc/test/testsupport/fileutils.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(PitchBasedVadTest, VoicingProbabilityTest) {
|
||||
std::string spectral_peak_file_name = test::ResourcePath(
|
||||
"audio_processing/agc/agc_spectral_peak", "dat");
|
||||
FILE* spectral_peak_file = fopen(spectral_peak_file_name.c_str(), "rb");
|
||||
ASSERT_TRUE(spectral_peak_file != NULL);
|
||||
|
||||
std::string pitch_gain_file_name =
|
||||
test::ResourcePath("audio_processing/agc/agc_pitch_gain", "dat");
|
||||
FILE* pitch_gain_file = fopen(pitch_gain_file_name.c_str(), "rb");
|
||||
ASSERT_TRUE(pitch_gain_file != NULL);
|
||||
|
||||
std::string pitch_lag_file_name =
|
||||
test::ResourcePath("audio_processing/agc/agc_pitch_lag", "dat");
|
||||
FILE* pitch_lag_file = fopen(pitch_lag_file_name.c_str(), "rb");
|
||||
ASSERT_TRUE(pitch_lag_file != NULL);
|
||||
|
||||
std::string voicing_prob_file_name =
|
||||
test::ResourcePath("audio_processing/agc/agc_voicing_prob", "dat");
|
||||
FILE* voicing_prob_file = fopen(voicing_prob_file_name.c_str(), "rb");
|
||||
ASSERT_TRUE(voicing_prob_file != NULL);
|
||||
|
||||
PitchBasedVad vad_;
|
||||
|
||||
double reference_activity_probability;
|
||||
|
||||
AudioFeatures audio_features;
|
||||
memset(&audio_features, 0, sizeof(audio_features));
|
||||
audio_features.num_frames = 1;
|
||||
while (fread(audio_features.spectral_peak,
|
||||
sizeof(audio_features.spectral_peak[0]), 1,
|
||||
spectral_peak_file) == 1u) {
|
||||
double p;
|
||||
ASSERT_EQ(1u, fread(audio_features.log_pitch_gain, sizeof(
|
||||
audio_features.log_pitch_gain[0]), 1, pitch_gain_file));
|
||||
ASSERT_EQ(1u, fread(audio_features.pitch_lag_hz, sizeof(
|
||||
audio_features.pitch_lag_hz[0]), 1, pitch_lag_file));
|
||||
ASSERT_EQ(1u, fread(&reference_activity_probability, sizeof(
|
||||
reference_activity_probability), 1, voicing_prob_file));
|
||||
|
||||
p = 0.5; // Initialize to the neutral value for combining probabilities.
|
||||
EXPECT_EQ(0, vad_.VoicingProbability(audio_features, &p));
|
||||
EXPECT_NEAR(p, reference_activity_probability, 0.01);
|
||||
}
|
||||
|
||||
fclose(spectral_peak_file);
|
||||
fclose(pitch_gain_file);
|
||||
fclose(pitch_lag_file);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
52
webrtc/modules/audio_processing/agc/pitch_internal.cc
Normal file
52
webrtc/modules/audio_processing/agc/pitch_internal.cc
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/pitch_internal.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
// A 4-to-3 linear interpolation.
|
||||
// The interpolation constants are derived as following:
|
||||
// Input pitch parameters are updated every 7.5 ms. Within a 30-ms interval
|
||||
// we are interested in pitch parameters of 0-5 ms, 10-15ms and 20-25ms. This is
|
||||
// like interpolating 4-to-6 and keep the odd samples.
|
||||
// The reason behind this is that LPC coefficients are computed for the first
|
||||
// half of each 10ms interval.
|
||||
static void PitchInterpolation(double old_val, const double* in, double* out) {
|
||||
out[0] = 1. / 6. * old_val + 5. / 6. * in[0];
|
||||
out[1] = 5. / 6. * in[1] + 1. / 6. * in[2];
|
||||
out[2] = 0.5 * in[2] + 0.5 * in[3];
|
||||
}
|
||||
|
||||
|
||||
void GetSubframesPitchParameters(int sampling_rate_hz,
|
||||
double* gains,
|
||||
double* lags,
|
||||
int num_in_frames,
|
||||
int num_out_frames,
|
||||
double* log_old_gain,
|
||||
double* old_lag,
|
||||
double* log_pitch_gain,
|
||||
double* pitch_lag_hz) {
|
||||
// Gain interpolation is in log-domain, also returned in log-domain.
|
||||
for (int n = 0; n < num_in_frames; n++)
|
||||
gains[n] = log(gains[n] + 1e-12);
|
||||
|
||||
// Interpolate lags and gains.
|
||||
PitchInterpolation(*log_old_gain, gains, log_pitch_gain);
|
||||
*log_old_gain = gains[num_in_frames - 1];
|
||||
PitchInterpolation(*old_lag, lags, pitch_lag_hz);
|
||||
*old_lag = lags[num_in_frames - 1];
|
||||
|
||||
// Convert pitch-lags to Hertz.
|
||||
for (int n = 0; n < num_out_frames; n++) {
|
||||
pitch_lag_hz[n] = (sampling_rate_hz) / (pitch_lag_hz[n]);
|
||||
}
|
||||
}
|
26
webrtc/modules/audio_processing/agc/pitch_internal.h
Normal file
26
webrtc/modules/audio_processing/agc/pitch_internal.h
Normal file
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_INTERNAL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_INTERNAL_H_
|
||||
|
||||
// TODO(turajs): Write a description of this function. Also be consistent with
|
||||
// usage of |sampling_rate_hz| vs |kSamplingFreqHz|.
|
||||
void GetSubframesPitchParameters(int sampling_rate_hz,
|
||||
double* gains,
|
||||
double* lags,
|
||||
int num_in_frames,
|
||||
int num_out_frames,
|
||||
double* log_old_gain,
|
||||
double* old_lag,
|
||||
double* log_pitch_gain,
|
||||
double* pitch_lag_hz);
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_PITCH_INTERNAL_H_
|
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/pitch_internal.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
TEST(PitchInternalTest, test) {
|
||||
const int kSamplingRateHz = 8000;
|
||||
const int kNumInputParameters = 4;
|
||||
const int kNumOutputParameters = 3;
|
||||
// Inputs
|
||||
double log_old_gain = log(0.5);
|
||||
double gains[] = {0.6, 0.2, 0.5, 0.4};
|
||||
|
||||
double old_lag = 70;
|
||||
double lags[] = {90, 111, 122, 50};
|
||||
|
||||
// Expected outputs
|
||||
double expected_log_pitch_gain[] = {-0.541212549898316, -1.45672279045507,
|
||||
-0.80471895621705};
|
||||
double expected_log_old_gain = log(gains[kNumInputParameters - 1]);
|
||||
|
||||
double expected_pitch_lag_hz[] = {92.3076923076923, 70.9010339734121,
|
||||
93.0232558139535};
|
||||
double expected_old_lag = lags[kNumInputParameters - 1];
|
||||
|
||||
double log_pitch_gain[kNumOutputParameters];
|
||||
double pitch_lag_hz[kNumInputParameters];
|
||||
|
||||
GetSubframesPitchParameters(kSamplingRateHz, gains, lags, kNumInputParameters,
|
||||
kNumOutputParameters, &log_old_gain, &old_lag,
|
||||
log_pitch_gain, pitch_lag_hz);
|
||||
|
||||
for (int n = 0; n < 3; n++) {
|
||||
EXPECT_NEAR(pitch_lag_hz[n], expected_pitch_lag_hz[n], 1e-6);
|
||||
EXPECT_NEAR(log_pitch_gain[n], expected_log_pitch_gain[n], 1e-8);
|
||||
}
|
||||
EXPECT_NEAR(old_lag, expected_old_lag, 1e-6);
|
||||
EXPECT_NEAR(log_old_gain, expected_log_old_gain, 1e-8);
|
||||
}
|
111
webrtc/modules/audio_processing/agc/pole_zero_filter.cc
Normal file
111
webrtc/modules/audio_processing/agc/pole_zero_filter.cc
Normal file
@ -0,0 +1,111 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/pole_zero_filter.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
PoleZeroFilter* PoleZeroFilter::Create(const float* numerator_coefficients,
|
||||
int order_numerator,
|
||||
const float* denominator_coefficients,
|
||||
int order_denominator) {
|
||||
if (order_numerator < 0 ||
|
||||
order_denominator < 0 ||
|
||||
order_numerator > kMaxFilterOrder ||
|
||||
order_denominator > kMaxFilterOrder ||
|
||||
denominator_coefficients[0] == 0 ||
|
||||
numerator_coefficients == NULL ||
|
||||
denominator_coefficients == NULL)
|
||||
return NULL;
|
||||
return new PoleZeroFilter(numerator_coefficients, order_numerator,
|
||||
denominator_coefficients, order_denominator);
|
||||
}
|
||||
|
||||
PoleZeroFilter::PoleZeroFilter(const float* numerator_coefficients,
|
||||
int order_numerator,
|
||||
const float* denominator_coefficients,
|
||||
int order_denominator)
|
||||
: past_input_(),
|
||||
past_output_(),
|
||||
numerator_coefficients_(),
|
||||
denominator_coefficients_(),
|
||||
order_numerator_(order_numerator),
|
||||
order_denominator_(order_denominator),
|
||||
highest_order_(std::max(order_denominator, order_numerator)) {
|
||||
memcpy(numerator_coefficients_, numerator_coefficients,
|
||||
sizeof(numerator_coefficients_[0]) * (order_numerator_ + 1));
|
||||
memcpy(denominator_coefficients_, denominator_coefficients,
|
||||
sizeof(denominator_coefficients_[0]) * (order_denominator_ + 1));
|
||||
|
||||
if (denominator_coefficients_[0] != 1) {
|
||||
for (int n = 0; n <= order_numerator_; n++)
|
||||
numerator_coefficients_[n] /= denominator_coefficients_[0];
|
||||
for (int n = 0; n <= order_denominator_; n++)
|
||||
denominator_coefficients_[n] /= denominator_coefficients_[0];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static float FilterArPast(const T* past, int order,
|
||||
const float* coefficients) {
|
||||
float sum = 0.0f;
|
||||
int past_index = order - 1;
|
||||
for (int k = 1; k <= order; k++, past_index--)
|
||||
sum += coefficients[k] * past[past_index];
|
||||
return sum;
|
||||
}
|
||||
|
||||
int PoleZeroFilter::Filter(const int16_t* in,
|
||||
int num_input_samples,
|
||||
float* output) {
|
||||
if (in == NULL || num_input_samples < 0 || output == NULL)
|
||||
return -1;
|
||||
// This is the typical case, just a memcpy.
|
||||
const int k = std::min(num_input_samples, highest_order_);
|
||||
int n;
|
||||
for (n = 0; n < k; n++) {
|
||||
output[n] = in[n] * numerator_coefficients_[0];
|
||||
output[n] += FilterArPast(&past_input_[n], order_numerator_,
|
||||
numerator_coefficients_);
|
||||
output[n] -= FilterArPast(&past_output_[n], order_denominator_,
|
||||
denominator_coefficients_);
|
||||
|
||||
past_input_[n + order_numerator_] = in[n];
|
||||
past_output_[n + order_denominator_] = output[n];
|
||||
}
|
||||
if (highest_order_ < num_input_samples) {
|
||||
for (int m = 0; n < num_input_samples; n++, m++) {
|
||||
output[n] = in[n] * numerator_coefficients_[0];
|
||||
output[n] += FilterArPast(&in[m], order_numerator_,
|
||||
numerator_coefficients_);
|
||||
output[n] -= FilterArPast(&output[m], order_denominator_,
|
||||
denominator_coefficients_);
|
||||
}
|
||||
// Record into the past signal.
|
||||
memcpy(past_input_, &in[num_input_samples - order_numerator_],
|
||||
sizeof(in[0]) * order_numerator_);
|
||||
memcpy(past_output_, &output[num_input_samples - order_denominator_],
|
||||
sizeof(output[0]) * order_denominator_);
|
||||
} else {
|
||||
// Odd case that the length of the input is shorter that filter order.
|
||||
memmove(past_input_, &past_input_[num_input_samples], order_numerator_ *
|
||||
sizeof(past_input_[0]));
|
||||
memmove(past_output_, &past_output_[num_input_samples], order_denominator_ *
|
||||
sizeof(past_output_[0]));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
50
webrtc/modules/audio_processing/agc/pole_zero_filter.h
Normal file
50
webrtc/modules/audio_processing/agc/pole_zero_filter.h
Normal file
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_POLE_ZERO_FILTER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_POLE_ZERO_FILTER_H_
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class PoleZeroFilter {
|
||||
public:
|
||||
~PoleZeroFilter() {}
|
||||
|
||||
static PoleZeroFilter* Create(const float* numerator_coefficients,
|
||||
int order_numerator,
|
||||
const float* denominator_coefficients,
|
||||
int order_denominator);
|
||||
|
||||
int Filter(const int16_t* in, int num_input_samples, float* output);
|
||||
|
||||
private:
|
||||
PoleZeroFilter(const float* numerator_coefficients,
|
||||
int order_numerator,
|
||||
const float* denominator_coefficients,
|
||||
int order_denominator);
|
||||
|
||||
static const int kMaxFilterOrder = 24;
|
||||
|
||||
int16_t past_input_[kMaxFilterOrder * 2];
|
||||
float past_output_[kMaxFilterOrder * 2];
|
||||
|
||||
float numerator_coefficients_[kMaxFilterOrder + 1];
|
||||
float denominator_coefficients_[kMaxFilterOrder + 1];
|
||||
|
||||
int order_numerator_;
|
||||
int order_denominator_;
|
||||
int highest_order_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_POLE_ZERO_FILTER_H_
|
@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/pole_zero_filter.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "webrtc/modules/audio_processing/agc/agc_audio_proc_internal.h"
|
||||
#include "webrtc/system_wrappers/interface/compile_assert.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
#include "webrtc/test/testsupport/fileutils.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kInputSamples = 50;
|
||||
|
||||
static const int16_t kInput[kInputSamples] = {-2136, -7116, 10715, 2464, 3164,
|
||||
8139, 11393, 24013, -32117, -5544, -27740, 10181, 14190, -24055, -15912,
|
||||
17393, 6359, -9950, -13894, 32432, -23944, 3437, -8381, 19768, 3087, -19795,
|
||||
-5920, 13310, 1407, 3876, 4059, 3524, -23130, 19121, -27900, -24840, 4089,
|
||||
21422, -3625, 3015, -11236, 28856, 13424, 6571, -19761, -6361, 15821, -9469,
|
||||
29727, 32229};
|
||||
|
||||
static const float kReferenceOutput[kInputSamples] = {-2082.230472f,
|
||||
-6878.572941f, 10697.090871f, 2358.373952f, 2973.936512f, 7738.580650f,
|
||||
10690.803213f, 22687.091576f, -32676.684717f, -5879.621684f, -27359.297432f,
|
||||
10368.735888f, 13994.584604f, -23676.126249f, -15078.250390f, 17818.253338f,
|
||||
6577.743123f, -9498.369315f, -13073.651079f, 32460.026588f, -23391.849347f,
|
||||
3953.805667f, -7667.761363f, 19995.153447f, 3185.575477f, -19207.365160f,
|
||||
-5143.103201f, 13756.317237f, 1779.654794f, 4142.269755f, 4209.475034f,
|
||||
3572.991789f, -22509.089546f, 19307.878964f, -27060.439759f, -23319.042810f,
|
||||
5547.685267f, 22312.718676f, -2707.309027f, 3852.358490f, -10135.510093f,
|
||||
29241.509970f, 13394.397233f, 6340.721417f, -19510.207905f, -5908.442086f,
|
||||
15882.301634f, -9211.335255f, 29253.056735f, 30874.443046f};
|
||||
|
||||
class PoleZeroFilterTest : public ::testing::Test {
|
||||
protected:
|
||||
PoleZeroFilterTest()
|
||||
: my_filter_(PoleZeroFilter::Create(
|
||||
kCoeffNumerator, kFilterOrder, kCoeffDenominator, kFilterOrder)) {}
|
||||
|
||||
~PoleZeroFilterTest() {}
|
||||
|
||||
void FilterSubframes(int num_subframes);
|
||||
|
||||
private:
|
||||
void TestClean();
|
||||
scoped_ptr<PoleZeroFilter> my_filter_;
|
||||
};
|
||||
|
||||
void PoleZeroFilterTest::FilterSubframes(int num_subframes) {
|
||||
float output[kInputSamples];
|
||||
const int num_subframe_samples = kInputSamples / num_subframes;
|
||||
EXPECT_EQ(num_subframe_samples * num_subframes, kInputSamples);
|
||||
|
||||
for (int n = 0; n < num_subframes; n++) {
|
||||
my_filter_->Filter(&kInput[n * num_subframe_samples], num_subframe_samples,
|
||||
&output[n * num_subframe_samples]);
|
||||
}
|
||||
for (int n = 0; n < kInputSamples; n++) {
|
||||
EXPECT_NEAR(output[n], kReferenceOutput[n], 1);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(PoleZeroFilterTest, OneSubframe) {
|
||||
FilterSubframes(1);
|
||||
}
|
||||
|
||||
TEST_F(PoleZeroFilterTest, TwoSubframes) {
|
||||
FilterSubframes(2);
|
||||
}
|
||||
|
||||
TEST_F(PoleZeroFilterTest, FiveSubframes) {
|
||||
FilterSubframes(5);
|
||||
}
|
||||
|
||||
TEST_F(PoleZeroFilterTest, TenSubframes) {
|
||||
FilterSubframes(10);
|
||||
}
|
||||
|
||||
TEST_F(PoleZeroFilterTest, TwentyFiveSubframes) {
|
||||
FilterSubframes(25);
|
||||
}
|
||||
|
||||
TEST_F(PoleZeroFilterTest, FiftySubframes) {
|
||||
FilterSubframes(50);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
96
webrtc/modules/audio_processing/agc/standalone_vad.cc
Normal file
96
webrtc/modules/audio_processing/agc/standalone_vad.cc
Normal file
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/standalone_vad.h"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "webrtc/modules/utility/interface/audio_frame_operations.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kDefaultStandaloneVadMode = 3;
|
||||
|
||||
StandaloneVad::StandaloneVad(VadInst* vad)
|
||||
: vad_(vad),
|
||||
buffer_(),
|
||||
index_(0),
|
||||
mode_(kDefaultStandaloneVadMode) {}
|
||||
|
||||
StandaloneVad::~StandaloneVad() {
|
||||
WebRtcVad_Free(vad_);
|
||||
}
|
||||
|
||||
StandaloneVad* StandaloneVad::Create() {
|
||||
VadInst* vad = NULL;
|
||||
if (WebRtcVad_Create(&vad) < 0)
|
||||
return NULL;
|
||||
|
||||
int err = WebRtcVad_Init(vad);
|
||||
err |= WebRtcVad_set_mode(vad, kDefaultStandaloneVadMode);
|
||||
if (err != 0) {
|
||||
WebRtcVad_Free(vad);
|
||||
return NULL;
|
||||
}
|
||||
return new StandaloneVad(vad);
|
||||
}
|
||||
|
||||
int StandaloneVad::AddAudio(const int16_t* data, int length) {
|
||||
if (length != kLength10Ms)
|
||||
return -1;
|
||||
|
||||
if (index_ + length > kLength10Ms * kMaxNum10msFrames)
|
||||
// Reset the buffer if it's full.
|
||||
// TODO(ajm): Instead, consider just processing every 10 ms frame. Then we
|
||||
// can forgo the buffering.
|
||||
index_ = 0;
|
||||
|
||||
memcpy(&buffer_[index_], data, sizeof(int16_t) * length);
|
||||
index_ += length;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int StandaloneVad::GetActivity(double* p, int length_p) {
|
||||
if (index_ == 0)
|
||||
return -1;
|
||||
|
||||
const int num_frames = index_ / kLength10Ms;
|
||||
if (num_frames > length_p)
|
||||
return -1;
|
||||
assert(WebRtcVad_ValidRateAndFrameLength(kSampleRateHz, index_) == 0);
|
||||
|
||||
int activity = WebRtcVad_Process(vad_, kSampleRateHz, buffer_, index_);
|
||||
if (activity < 0)
|
||||
return -1;
|
||||
else if (activity == 0)
|
||||
p[0] = 0.01; // Arbitrary but small and non-zero.
|
||||
else
|
||||
p[0] = 0.5; // 0.5 is neutral values when combinned by other probabilities.
|
||||
for (int n = 1; n < num_frames; n++)
|
||||
p[n] = p[0];
|
||||
// Reset the buffer to start from the beginning.
|
||||
index_ = 0;
|
||||
return activity;
|
||||
}
|
||||
|
||||
int StandaloneVad::set_mode(int mode) {
|
||||
if (mode < 0 || mode > 3)
|
||||
return -1;
|
||||
if (WebRtcVad_set_mode(vad_, mode) != 0)
|
||||
return -1;
|
||||
|
||||
mode_ = mode;
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
70
webrtc/modules/audio_processing/agc/standalone_vad.h
Normal file
70
webrtc/modules/audio_processing/agc/standalone_vad.h
Normal file
@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
|
||||
|
||||
#include "webrtc/common_audio/vad/include/webrtc_vad.h"
|
||||
#include "webrtc/modules/audio_processing/agc/common.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioFrame;
|
||||
|
||||
class StandaloneVad {
|
||||
public:
|
||||
static StandaloneVad* Create();
|
||||
~StandaloneVad();
|
||||
|
||||
// Outputs
|
||||
// p: a buffer where probabilities are written to.
|
||||
// length_p: number of elements of |p|.
|
||||
//
|
||||
// return value:
|
||||
// -1: if no audio is stored or VAD returns error.
|
||||
// 0: in success.
|
||||
// In case of error the content of |activity| is unchanged.
|
||||
//
|
||||
// Note that due to a high false-positive (VAD decision is active while the
|
||||
// processed audio is just background noise) rate, stand-alone VAD is used as
|
||||
// a one-sided indicator. The activity probability is 0.5 if the frame is
|
||||
// classified as active, and the probability is 0.01 if the audio is
|
||||
// classified as passive. In this way, when probabilities are combined, the
|
||||
// effect of the stand-alone VAD is neutral if the input is classified as
|
||||
// active.
|
||||
int GetActivity(double* p, int length_p);
|
||||
|
||||
// Expecting 10 ms of 16 kHz audio to be pushed in.
|
||||
int AddAudio(const int16_t* data, int length);
|
||||
|
||||
// Set aggressiveness of VAD, 0 is the least aggressive and 3 is the most
|
||||
// aggressive mode. Returns -1 if the input is less than 0 or larger than 3,
|
||||
// otherwise 0 is returned.
|
||||
int set_mode(int mode);
|
||||
// Get the agressiveness of the current VAD.
|
||||
int mode() const { return mode_; }
|
||||
|
||||
private:
|
||||
explicit StandaloneVad(VadInst* vad);
|
||||
|
||||
static const int kMaxNum10msFrames = 3;
|
||||
|
||||
// TODO(turajs): Is there a way to use scoped-pointer here?
|
||||
VadInst* vad_;
|
||||
int16_t buffer_[kMaxNum10msFrames * kLength10Ms];
|
||||
int index_;
|
||||
int mode_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
|
103
webrtc/modules/audio_processing/agc/standalone_vad_unittest.cc
Normal file
103
webrtc/modules/audio_processing/agc/standalone_vad_unittest.cc
Normal file
@ -0,0 +1,103 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/standalone_vad.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
#include "webrtc/test/testsupport/fileutils.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(StandaloneVadTest, Api) {
|
||||
scoped_ptr<StandaloneVad> vad(StandaloneVad::Create());
|
||||
int16_t data[kLength10Ms] = { 0 };
|
||||
|
||||
// Valid frame length (for 32 kHz rate), but not what the VAD is expecting.
|
||||
EXPECT_EQ(-1, vad->AddAudio(data, 320));
|
||||
|
||||
const int kMaxNumFrames = 3;
|
||||
double p[kMaxNumFrames];
|
||||
for (int n = 0; n < kMaxNumFrames; n++)
|
||||
EXPECT_EQ(0, vad->AddAudio(data, kLength10Ms));
|
||||
|
||||
// Pretend |p| is shorter that it should be.
|
||||
EXPECT_EQ(-1, vad->GetActivity(p, kMaxNumFrames - 1));
|
||||
|
||||
EXPECT_EQ(0, vad->GetActivity(p, kMaxNumFrames));
|
||||
|
||||
// Ask for activity when buffer is empty.
|
||||
EXPECT_EQ(-1, vad->GetActivity(p, kMaxNumFrames));
|
||||
|
||||
// Should reset and result in one buffer.
|
||||
for (int n = 0; n < kMaxNumFrames + 1; n++)
|
||||
EXPECT_EQ(0, vad->AddAudio(data, kLength10Ms));
|
||||
EXPECT_EQ(0, vad->GetActivity(p, 1));
|
||||
|
||||
// Wrong modes
|
||||
EXPECT_EQ(-1, vad->set_mode(-1));
|
||||
EXPECT_EQ(-1, vad->set_mode(4));
|
||||
|
||||
// Valid mode.
|
||||
const int kMode = 2;
|
||||
EXPECT_EQ(0, vad->set_mode(kMode));
|
||||
EXPECT_EQ(kMode, vad->mode());
|
||||
}
|
||||
|
||||
TEST(StandaloneVadTest, ActivityDetection) {
|
||||
scoped_ptr<StandaloneVad> vad(StandaloneVad::Create());
|
||||
const size_t kDataLength = kLength10Ms;
|
||||
int16_t data[kDataLength] = { 0 };
|
||||
|
||||
FILE* pcm_file =
|
||||
fopen(test::ResourcePath("audio_processing/agc/agc_audio", "pcm").c_str(),
|
||||
"rb");
|
||||
ASSERT_TRUE(pcm_file != NULL);
|
||||
|
||||
FILE* reference_file = fopen(
|
||||
test::ResourcePath("audio_processing/agc/agc_vad", "dat").c_str(), "rb");
|
||||
ASSERT_TRUE(reference_file != NULL);
|
||||
|
||||
// Reference activities are prepared with 0 aggressiveness.
|
||||
ASSERT_EQ(0, vad->set_mode(0));
|
||||
|
||||
// Stand-alone VAD can operate on 1, 2 or 3 frames of length 10 ms. The
|
||||
// reference file is created for 30 ms frame.
|
||||
const int kNumVadFramesToProcess = 3;
|
||||
int num_frames = 0;
|
||||
while (fread(data, sizeof(int16_t), kDataLength, pcm_file) == kDataLength) {
|
||||
vad->AddAudio(data, kDataLength);
|
||||
num_frames++;
|
||||
if (num_frames == kNumVadFramesToProcess) {
|
||||
num_frames = 0;
|
||||
int referece_activity;
|
||||
double p[kNumVadFramesToProcess];
|
||||
EXPECT_EQ(1u, fread(&referece_activity, sizeof(referece_activity), 1,
|
||||
reference_file));
|
||||
int activity = vad->GetActivity(p, kNumVadFramesToProcess);
|
||||
EXPECT_EQ(referece_activity, activity);
|
||||
if (activity != 0) {
|
||||
// When active, probabilities are set to 0.5.
|
||||
for (int n = 0; n < kNumVadFramesToProcess; n++)
|
||||
EXPECT_EQ(0.5, p[n]);
|
||||
} else {
|
||||
// When inactive, probabilities are set to 0.01.
|
||||
for (int n = 0; n < kNumVadFramesToProcess; n++)
|
||||
EXPECT_EQ(0.01, p[n]);
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(reference_file);
|
||||
fclose(pcm_file);
|
||||
}
|
||||
}
|
46
webrtc/modules/audio_processing/agc/test/fake_agc.h
Normal file
46
webrtc/modules/audio_processing/agc/test/fake_agc.h
Normal file
@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_TEST_FAKE_AGC_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_TEST_FAKE_AGC_H_
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/agc.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class FakeAgc : public Agc {
|
||||
public:
|
||||
FakeAgc()
|
||||
: counter_(0),
|
||||
volume_(kMaxVolume / 2) {
|
||||
}
|
||||
|
||||
virtual int Process(const AudioFrame& audio_frame) {
|
||||
const int kUpdateIntervalFrames = 10;
|
||||
const int kMaxVolume = 255;
|
||||
if (counter_ % kUpdateIntervalFrames == 0) {
|
||||
volume_ = (++volume_) % kMaxVolume;
|
||||
}
|
||||
counter_++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
virtual int FakeAgc::MicVolume() {
|
||||
return volume_;
|
||||
}
|
||||
|
||||
private:
|
||||
int counter_;
|
||||
int volume_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_TEST_FAKE_AGC_H_
|
63
webrtc/modules/audio_processing/agc/test/test_utils.cc
Normal file
63
webrtc/modules/audio_processing/agc/test/test_utils.cc
Normal file
@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/test/test_utils.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
float MicLevel2Gain(int gain_range_db, int level) {
|
||||
return (level - 127.0f) / 128.0f * gain_range_db / 2;
|
||||
}
|
||||
|
||||
float Db2Linear(float db) {
|
||||
return powf(10.0f, db / 20.0f);
|
||||
}
|
||||
|
||||
void ApplyGainLinear(float gain, float last_gain, AudioFrame* frame) {
|
||||
const int frame_length = frame->samples_per_channel_ * frame->num_channels_;
|
||||
// Smooth the transition between gain levels across the frame.
|
||||
float smoothed_gain = last_gain;
|
||||
float gain_step = (gain - last_gain) / (frame_length - 1);
|
||||
for (int i = 0; i < frame_length; ++i) {
|
||||
smoothed_gain += gain_step;
|
||||
float sample = std::floor(frame->data_[i] * smoothed_gain + 0.5);
|
||||
sample = std::max(std::min(32767.0f, sample), -32768.0f);
|
||||
frame->data_[i] = static_cast<int16_t>(sample);
|
||||
}
|
||||
}
|
||||
|
||||
void ApplyGain(float gain_db, float last_gain_db, AudioFrame* frame) {
|
||||
ApplyGainLinear(Db2Linear(gain_db), Db2Linear(last_gain_db), frame);
|
||||
}
|
||||
|
||||
void SimulateMic(int gain_range_db, int mic_level, int last_mic_level,
|
||||
AudioFrame* frame) {
|
||||
assert(mic_level >= 0 && mic_level <= 255);
|
||||
assert(last_mic_level >= 0 && last_mic_level <= 255);
|
||||
ApplyGain(MicLevel2Gain(gain_range_db, mic_level),
|
||||
MicLevel2Gain(gain_range_db, last_mic_level),
|
||||
frame);
|
||||
}
|
||||
|
||||
void SimulateMic(int gain_map[255], int mic_level, int last_mic_level,
|
||||
AudioFrame* frame) {
|
||||
assert(mic_level >= 0 && mic_level <= 255);
|
||||
assert(last_mic_level >= 0 && last_mic_level <= 255);
|
||||
ApplyGain(gain_map[mic_level], gain_map[last_mic_level], frame);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
28
webrtc/modules/audio_processing/agc/test/test_utils.h
Normal file
28
webrtc/modules/audio_processing/agc/test/test_utils.h
Normal file
@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_
|
||||
namespace webrtc {
|
||||
|
||||
class AudioFrame;
|
||||
|
||||
float MicLevel2Gain(int gain_range_db, int level);
|
||||
float Db2Linear(float db);
|
||||
void ApplyGainLinear(float gain, float last_gain, AudioFrame* frame);
|
||||
void ApplyGain(float gain_db, float last_gain_db, AudioFrame* frame);
|
||||
void SimulateMic(int gain_range_db, int mic_level, int last_mic_level,
|
||||
AudioFrame* frame);
|
||||
void SimulateMic(int gain_map[255], int mic_level, int last_mic_level,
|
||||
AudioFrame* frame);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_
|
35
webrtc/modules/audio_processing/agc/utility.cc
Normal file
35
webrtc/modules/audio_processing/agc/utility.cc
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/utility.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
static const double kLog10 = 2.30258509299;
|
||||
static const double kLinear2DbScale = 20.0 / kLog10;
|
||||
static const double kLinear2LoudnessScale = 13.4 / kLog10;
|
||||
|
||||
double Loudness2Db(double loudness) {
|
||||
return loudness * kLinear2DbScale / kLinear2LoudnessScale;
|
||||
}
|
||||
|
||||
double Linear2Loudness(double rms) {
|
||||
if (rms == 0)
|
||||
return -15;
|
||||
return kLinear2LoudnessScale * log(rms);
|
||||
}
|
||||
|
||||
double Db2Loudness(double db) {
|
||||
return db * kLinear2LoudnessScale / kLinear2DbScale;
|
||||
}
|
||||
|
||||
double Dbfs2Loudness(double dbfs) {
|
||||
return Db2Loudness(90 + dbfs);
|
||||
}
|
23
webrtc/modules/audio_processing/agc/utility.h
Normal file
23
webrtc/modules/audio_processing/agc/utility.h
Normal file
@ -0,0 +1,23 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
|
||||
|
||||
// TODO(turajs): Add description of function.
|
||||
double Loudness2Db(double loudness);
|
||||
|
||||
double Linear2Loudness(double rms);
|
||||
|
||||
double Db2Loudness(double db);
|
||||
|
||||
double Dbfs2Loudness(double dbfs);
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
|
77
webrtc/modules/audio_processing/agc/voice_gmm_tables.h
Normal file
77
webrtc/modules/audio_processing/agc/voice_gmm_tables.h
Normal file
@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// GMM tables for active segments. Generated by MakeGmmTables.m.
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_VOICE_GMM_TABLES_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_VOICE_GMM_TABLES_H_
|
||||
|
||||
static const int kVoiceGmmNumMixtures = 12;
|
||||
static const int kVoiceGmmDim = 3;
|
||||
|
||||
static const double kVoiceGmmCovarInverse[kVoiceGmmNumMixtures]
|
||||
[kVoiceGmmDim][kVoiceGmmDim] = {
|
||||
{{ 1.83673825579513e+00, -8.09791637570095e-04, 4.60106414365986e-03},
|
||||
{-8.09791637570095e-04, 8.89351738394608e-04, -9.80188953277734e-04},
|
||||
{ 4.60106414365986e-03, -9.80188953277734e-04, 1.38706060206582e-03}},
|
||||
{{ 6.76228912850703e+01, -1.98893120119660e-02, -3.53548357253551e-03},
|
||||
{-1.98893120119660e-02, 3.96216858500530e-05, -4.08492938394097e-05},
|
||||
{-3.53548357253551e-03, -4.08492938394097e-05, 9.31864352856416e-04}},
|
||||
{{ 9.98612435944558e+00, -5.27880954316893e-03, -6.30342541619017e-03},
|
||||
{-5.27880954316893e-03, 4.54359480225226e-05, 6.30804591626044e-05},
|
||||
{-6.30342541619017e-03, 6.30804591626044e-05, 5.36466441382942e-04}},
|
||||
{{ 3.39917474216349e+01, -1.56213579433191e-03, -4.01459014990225e-02},
|
||||
{-1.56213579433191e-03, 6.40415424897724e-05, 6.20076342427833e-05},
|
||||
{-4.01459014990225e-02, 6.20076342427833e-05, 3.51199070103063e-03}},
|
||||
{{ 1.34545062271428e+01, -7.94513610147144e-03, -5.34401019341728e-02},
|
||||
{-7.94513610147144e-03, 1.16511820098649e-04, 4.66063702069293e-05},
|
||||
{-5.34401019341728e-02, 4.66063702069293e-05, 2.72354323774163e-03}},
|
||||
{{ 1.08557844314806e+02, -1.54885805673668e-02, -1.88029692674851e-02},
|
||||
{-1.54885805673668e-02, 1.16404042786406e-04, 6.45579292702802e-06},
|
||||
{-1.88029692674851e-02, 6.45579292702802e-06, 4.32330478391416e-04}},
|
||||
{{ 8.22940066541450e+01, -1.15903110231303e-02, -4.92166764865343e-02},
|
||||
{-1.15903110231303e-02, 7.42510742165261e-05, 3.73007314191290e-06},
|
||||
{-4.92166764865343e-02, 3.73007314191290e-06, 3.64005221593244e-03}},
|
||||
{{ 2.31133605685660e+00, -7.83261568950254e-04, 7.45744012346313e-04},
|
||||
{-7.83261568950254e-04, 1.29460648214142e-05, -2.22774455093730e-06},
|
||||
{ 7.45744012346313e-04, -2.22774455093730e-06, 1.05117294093010e-04}},
|
||||
{{ 3.78767849189611e+02, 1.57759761011568e-03, -2.08551217988774e-02},
|
||||
{ 1.57759761011568e-03, 4.76066236886865e-05, -2.33977412299324e-05},
|
||||
{-2.08551217988774e-02, -2.33977412299324e-05, 5.24261005371196e-04}},
|
||||
{{ 6.98580096506135e-01, -5.13850255217378e-04, -4.01124551717056e-04},
|
||||
{-5.13850255217378e-04, 1.40501021984840e-06, -2.09496928716569e-06},
|
||||
{-4.01124551717056e-04, -2.09496928716569e-06, 2.82879357740037e-04}},
|
||||
{{ 2.62770945162399e+00, -2.31825753241430e-03, -5.30447217466318e-03},
|
||||
{-2.31825753241430e-03, 4.59108572227649e-05, 7.67631886355405e-05},
|
||||
{-5.30447217466318e-03, 7.67631886355405e-05, 2.28521601674098e-03}},
|
||||
{{ 1.89940391362152e+02, -4.23280856852379e-03, -2.70608873541399e-02},
|
||||
{-4.23280856852379e-03, 6.77547582742563e-05, 2.69154203800467e-05},
|
||||
{-2.70608873541399e-02, 2.69154203800467e-05, 3.88574543373470e-03}}};
|
||||
|
||||
static const double kVoiceGmmMean[kVoiceGmmNumMixtures][kVoiceGmmDim] = {
|
||||
{-2.15020241646536e+00, 4.97079062999877e+02, 4.77078119504505e+02},
|
||||
{-8.92097680029190e-01, 5.92064964199921e+02, 1.81045145941059e+02},
|
||||
{-1.29435784144398e+00, 4.98450293410611e+02, 1.71991263804064e+02},
|
||||
{-1.03925228397884e+00, 4.99511274321571e+02, 1.05838336539105e+02},
|
||||
{-1.29229047206129e+00, 4.15026762566707e+02, 1.12861119017125e+02},
|
||||
{-7.88748114599810e-01, 4.48739336688113e+02, 1.89784216956337e+02},
|
||||
{-8.77777402332642e-01, 4.86620285054533e+02, 1.13477708016491e+02},
|
||||
{-2.06465957063057e+00, 6.33385049870607e+02, 2.32758546796149e+02},
|
||||
{-6.98893789231685e-01, 5.93622051503385e+02, 1.92536982473203e+02},
|
||||
{-2.55901217508894e+00, 1.55914919756205e+03, 1.39769980835570e+02},
|
||||
{-1.92070024165837e+00, 4.87983940444185e+02, 1.02745468128289e+02},
|
||||
{-7.29187507662854e-01, 5.22717685022855e+02, 1.16377942283991e+02}};
|
||||
|
||||
static const double kVoiceGmmWeights[kVoiceGmmNumMixtures] = {
|
||||
-1.39789694361035e+01, -1.19527720202104e+01, -1.32396317929055e+01,
|
||||
-1.09436815209238e+01, -1.13440027478149e+01, -1.12200721834504e+01,
|
||||
-1.02537324043693e+01, -1.60789861938302e+01, -1.03394494048344e+01,
|
||||
-1.83207938586818e+01, -1.31186044948288e+01, -9.52479998673554e+00};
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_VOICE_GMM_TABLES_H_
|
@ -9,6 +9,7 @@
|
||||
{
|
||||
'variables': {
|
||||
'audio_processing_dependencies': [
|
||||
'<(DEPTH)/webrtc/modules/modules.gyp:iSAC',
|
||||
'<(webrtc_root)/base/base.gyp:rtc_base_approved',
|
||||
'<(webrtc_root)/common_audio/common_audio.gyp:common_audio',
|
||||
'<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers',
|
||||
@ -33,25 +34,52 @@
|
||||
'<@(audio_processing_dependencies)',
|
||||
],
|
||||
'sources': [
|
||||
'aec/include/echo_cancellation.h',
|
||||
'aec/aec_core.c',
|
||||
'aec/aec_core.h',
|
||||
'aec/aec_core_internal.h',
|
||||
'aec/aec_rdft.c',
|
||||
'aec/aec_rdft.h',
|
||||
'aec/aec_resampler.c',
|
||||
'aec/aec_resampler.h',
|
||||
'aec/echo_cancellation.c',
|
||||
'aec/echo_cancellation_internal.h',
|
||||
'aec/aec_core.h',
|
||||
'aec/aec_core.c',
|
||||
'aec/aec_core_internal.h',
|
||||
'aec/aec_rdft.h',
|
||||
'aec/aec_rdft.c',
|
||||
'aec/aec_resampler.h',
|
||||
'aec/aec_resampler.c',
|
||||
'aecm/include/echo_control_mobile.h',
|
||||
'aecm/echo_control_mobile.c',
|
||||
'aec/include/echo_cancellation.h',
|
||||
'aecm/aecm_core.c',
|
||||
'aecm/aecm_core.h',
|
||||
'agc/include/gain_control.h',
|
||||
'aecm/echo_control_mobile.c',
|
||||
'aecm/include/echo_control_mobile.h',
|
||||
'agc/agc.cc',
|
||||
'agc/agc.h',
|
||||
'agc/agc_audio_proc.cc',
|
||||
'agc/agc_audio_proc.h',
|
||||
'agc/agc_audio_proc_internal.h',
|
||||
'agc/agc_manager_direct.cc',
|
||||
'agc/agc_manager_direct.h',
|
||||
'agc/analog_agc.c',
|
||||
'agc/analog_agc.h',
|
||||
'agc/circular_buffer.cc',
|
||||
'agc/circular_buffer.h',
|
||||
'agc/common.h',
|
||||
'agc/digital_agc.c',
|
||||
'agc/digital_agc.h',
|
||||
'agc/gain_map_internal.h',
|
||||
'agc/gmm.cc',
|
||||
'agc/gmm.h',
|
||||
'agc/histogram.cc',
|
||||
'agc/histogram.h',
|
||||
'agc/include/gain_control.h',
|
||||
'agc/noise_gmm_tables.h',
|
||||
'agc/pitch_based_vad.cc',
|
||||
'agc/pitch_based_vad.h',
|
||||
'agc/pitch_internal.cc',
|
||||
'agc/pitch_internal.h',
|
||||
'agc/pole_zero_filter.cc',
|
||||
'agc/pole_zero_filter.h',
|
||||
'agc/standalone_vad.cc',
|
||||
'agc/standalone_vad.h',
|
||||
'agc/utility.cc',
|
||||
'agc/utility.h',
|
||||
'agc/voice_gmm_tables.h',
|
||||
'audio_buffer.cc',
|
||||
'audio_buffer.h',
|
||||
'audio_processing_impl.cc',
|
||||
@ -74,10 +102,23 @@
|
||||
'noise_suppression_impl.h',
|
||||
'processing_component.cc',
|
||||
'processing_component.h',
|
||||
'splitting_filter.cc',
|
||||
'splitting_filter.h',
|
||||
'rms_level.cc',
|
||||
'rms_level.h',
|
||||
'splitting_filter.cc',
|
||||
'splitting_filter.h',
|
||||
'transient/common.h',
|
||||
'transient/daubechies_8_wavelet_coeffs.h',
|
||||
'transient/dyadic_decimator.h',
|
||||
'transient/moving_moments.cc',
|
||||
'transient/moving_moments.h',
|
||||
'transient/transient_detector.cc',
|
||||
'transient/transient_detector.h',
|
||||
'transient/transient_suppressor.cc',
|
||||
'transient/transient_suppressor.h',
|
||||
'transient/wpd_node.cc',
|
||||
'transient/wpd_node.h',
|
||||
'transient/wpd_tree.cc',
|
||||
'transient/wpd_tree.h',
|
||||
'typing_detection.cc',
|
||||
'typing_detection.h',
|
||||
'utility/delay_estimator.c',
|
||||
|
@ -15,6 +15,8 @@
|
||||
#include "webrtc/base/platform_file.h"
|
||||
#include "webrtc/common_audio/include/audio_util.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
|
||||
#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
|
||||
#include "webrtc/modules/audio_processing/audio_buffer.h"
|
||||
#include "webrtc/modules/audio_processing/channel_buffer.h"
|
||||
#include "webrtc/modules/audio_processing/common.h"
|
||||
@ -54,6 +56,85 @@ namespace webrtc {
|
||||
// Throughout webrtc, it's assumed that success is represented by zero.
|
||||
COMPILE_ASSERT(AudioProcessing::kNoError == 0, no_error_must_be_zero);
|
||||
|
||||
// This class has two main functionalities:
|
||||
//
|
||||
// 1) It is returned instead of the real GainControl after the new AGC has been
|
||||
// enabled in order to prevent an outside user from overriding compression
|
||||
// settings. It doesn't do anything in its implementation, except for
|
||||
// delegating the const methods and Enable calls to the real GainControl, so
|
||||
// AGC can still be disabled.
|
||||
//
|
||||
// 2) It is injected into AgcManagerDirect and implements volume callbacks for
|
||||
// getting and setting the volume level. It just caches this value to be used
|
||||
// in VoiceEngine later.
|
||||
class GainControlForNewAgc : public GainControl, public VolumeCallbacks {
|
||||
public:
|
||||
explicit GainControlForNewAgc(GainControlImpl* gain_control)
|
||||
: real_gain_control_(gain_control),
|
||||
volume_(0) {
|
||||
}
|
||||
|
||||
// GainControl implementation.
|
||||
virtual int Enable(bool enable) OVERRIDE {
|
||||
return real_gain_control_->Enable(enable);
|
||||
}
|
||||
virtual bool is_enabled() const OVERRIDE {
|
||||
return real_gain_control_->is_enabled();
|
||||
}
|
||||
virtual int set_stream_analog_level(int level) OVERRIDE {
|
||||
volume_ = level;
|
||||
return AudioProcessing::kNoError;
|
||||
}
|
||||
virtual int stream_analog_level() OVERRIDE {
|
||||
return volume_;
|
||||
}
|
||||
virtual int set_mode(Mode mode) OVERRIDE { return AudioProcessing::kNoError; }
|
||||
virtual Mode mode() const OVERRIDE { return GainControl::kAdaptiveAnalog; }
|
||||
virtual int set_target_level_dbfs(int level) OVERRIDE {
|
||||
return AudioProcessing::kNoError;
|
||||
}
|
||||
virtual int target_level_dbfs() const OVERRIDE {
|
||||
return real_gain_control_->target_level_dbfs();
|
||||
}
|
||||
virtual int set_compression_gain_db(int gain) OVERRIDE {
|
||||
return AudioProcessing::kNoError;
|
||||
}
|
||||
virtual int compression_gain_db() const OVERRIDE {
|
||||
return real_gain_control_->compression_gain_db();
|
||||
}
|
||||
virtual int enable_limiter(bool enable) OVERRIDE {
|
||||
return AudioProcessing::kNoError;
|
||||
}
|
||||
virtual bool is_limiter_enabled() const OVERRIDE {
|
||||
return real_gain_control_->is_limiter_enabled();
|
||||
}
|
||||
virtual int set_analog_level_limits(int minimum,
|
||||
int maximum) OVERRIDE {
|
||||
return AudioProcessing::kNoError;
|
||||
}
|
||||
virtual int analog_level_minimum() const OVERRIDE {
|
||||
return real_gain_control_->analog_level_minimum();
|
||||
}
|
||||
virtual int analog_level_maximum() const OVERRIDE {
|
||||
return real_gain_control_->analog_level_maximum();
|
||||
}
|
||||
virtual bool stream_is_saturated() const OVERRIDE {
|
||||
return real_gain_control_->stream_is_saturated();
|
||||
}
|
||||
|
||||
// VolumeCallbacks implementation.
|
||||
virtual void SetMicVolume(int volume) OVERRIDE {
|
||||
volume_ = volume;
|
||||
}
|
||||
virtual int GetMicVolume() OVERRIDE {
|
||||
return volume_;
|
||||
}
|
||||
|
||||
private:
|
||||
GainControl* real_gain_control_;
|
||||
int volume_;
|
||||
};
|
||||
|
||||
AudioProcessing* AudioProcessing::Create(int id) {
|
||||
return Create();
|
||||
}
|
||||
@ -96,7 +177,13 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config)
|
||||
delay_offset_ms_(0),
|
||||
was_stream_delay_set_(false),
|
||||
output_will_be_muted_(false),
|
||||
key_pressed_(false) {
|
||||
key_pressed_(false),
|
||||
#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS)
|
||||
use_new_agc_(false),
|
||||
#else
|
||||
use_new_agc_(config.Get<ExperimentalAgc>().enabled),
|
||||
#endif
|
||||
transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled) {
|
||||
echo_cancellation_ = new EchoCancellationImpl(this, crit_);
|
||||
component_list_.push_back(echo_cancellation_);
|
||||
|
||||
@ -118,12 +205,18 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config)
|
||||
voice_detection_ = new VoiceDetectionImpl(this, crit_);
|
||||
component_list_.push_back(voice_detection_);
|
||||
|
||||
gain_control_for_new_agc_.reset(new GainControlForNewAgc(gain_control_));
|
||||
|
||||
SetExtraOptions(config);
|
||||
}
|
||||
|
||||
AudioProcessingImpl::~AudioProcessingImpl() {
|
||||
{
|
||||
CriticalSectionScoped crit_scoped(crit_);
|
||||
// Depends on gain_control_ and gain_control_for_new_agc_.
|
||||
agc_manager_.reset();
|
||||
// Depends on gain_control_.
|
||||
gain_control_for_new_agc_.reset();
|
||||
while (!component_list_.empty()) {
|
||||
ProcessingComponent* component = component_list_.front();
|
||||
component->Destroy();
|
||||
@ -192,6 +285,16 @@ int AudioProcessingImpl::InitializeLocked() {
|
||||
}
|
||||
}
|
||||
|
||||
int err = InitializeExperimentalAgc();
|
||||
if (err != kNoError) {
|
||||
return err;
|
||||
}
|
||||
|
||||
err = InitializeTransient();
|
||||
if (err != kNoError) {
|
||||
return err;
|
||||
}
|
||||
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
if (debug_file_->Open()) {
|
||||
int err = WriteInitMessage();
|
||||
@ -303,6 +406,11 @@ void AudioProcessingImpl::SetExtraOptions(const Config& config) {
|
||||
std::list<ProcessingComponent*>::iterator it;
|
||||
for (it = component_list_.begin(); it != component_list_.end(); ++it)
|
||||
(*it)->SetExtraOptions(config);
|
||||
|
||||
if (transient_suppressor_enabled_ != config.Get<ExperimentalNs>().enabled) {
|
||||
transient_suppressor_enabled_ = config.Get<ExperimentalNs>().enabled;
|
||||
InitializeTransient();
|
||||
}
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::input_sample_rate_hz() const {
|
||||
@ -337,6 +445,10 @@ int AudioProcessingImpl::num_output_channels() const {
|
||||
|
||||
void AudioProcessingImpl::set_output_will_be_muted(bool muted) {
|
||||
output_will_be_muted_ = muted;
|
||||
CriticalSectionScoped lock(crit_);
|
||||
if (agc_manager_.get()) {
|
||||
agc_manager_->SetCaptureMuted(output_will_be_muted_);
|
||||
}
|
||||
}
|
||||
|
||||
bool AudioProcessingImpl::output_will_be_muted() const {
|
||||
@ -470,6 +582,12 @@ int AudioProcessingImpl::ProcessStreamLocked() {
|
||||
#endif
|
||||
|
||||
AudioBuffer* ca = capture_audio_.get(); // For brevity.
|
||||
if (use_new_agc_ && gain_control_->is_enabled()) {
|
||||
agc_manager_->AnalyzePreProcess(ca->data(0),
|
||||
ca->num_channels(),
|
||||
fwd_proc_format_.samples_per_channel());
|
||||
}
|
||||
|
||||
bool data_processed = is_data_processed();
|
||||
if (analysis_needed(data_processed)) {
|
||||
ca->SplitIntoFrequencyBands();
|
||||
@ -486,12 +604,35 @@ int AudioProcessingImpl::ProcessStreamLocked() {
|
||||
RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(ca));
|
||||
RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca));
|
||||
RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca));
|
||||
|
||||
if (use_new_agc_ && gain_control_->is_enabled()) {
|
||||
agc_manager_->Process(ca->split_bands_const(0)[kBand0To8kHz],
|
||||
ca->samples_per_split_channel(),
|
||||
split_rate_);
|
||||
}
|
||||
RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(ca));
|
||||
|
||||
if (synthesis_needed(data_processed)) {
|
||||
ca->MergeFrequencyBands();
|
||||
}
|
||||
|
||||
// TODO(aluebs): Investigate if the transient suppression placement should be
|
||||
// before or after the AGC.
|
||||
if (transient_suppressor_enabled_) {
|
||||
float voice_probability =
|
||||
agc_manager_.get() ? agc_manager_->voice_probability() : 1.f;
|
||||
|
||||
transient_suppressor_->Suppress(ca->data_f(0),
|
||||
ca->samples_per_channel(),
|
||||
ca->num_channels(),
|
||||
ca->split_bands_const_f(0)[kBand0To8kHz],
|
||||
ca->samples_per_split_channel(),
|
||||
ca->keyboard_data(),
|
||||
ca->samples_per_keyboard_channel(),
|
||||
voice_probability,
|
||||
key_pressed_);
|
||||
}
|
||||
|
||||
// The level estimator operates on the recombined data.
|
||||
RETURN_ON_ERR(level_estimator_->ProcessStream(ca));
|
||||
|
||||
@ -586,7 +727,9 @@ int AudioProcessingImpl::AnalyzeReverseStreamLocked() {
|
||||
|
||||
RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra));
|
||||
RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra));
|
||||
RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
|
||||
if (!use_new_agc_) {
|
||||
RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra));
|
||||
}
|
||||
|
||||
return kNoError;
|
||||
}
|
||||
@ -728,6 +871,9 @@ EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {
|
||||
}
|
||||
|
||||
GainControl* AudioProcessingImpl::gain_control() const {
|
||||
if (use_new_agc_) {
|
||||
return gain_control_for_new_agc_.get();
|
||||
}
|
||||
return gain_control_;
|
||||
}
|
||||
|
||||
@ -775,7 +921,7 @@ bool AudioProcessingImpl::is_data_processed() const {
|
||||
bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const {
|
||||
// Check if we've upmixed or downmixed the audio.
|
||||
return ((fwd_out_format_.num_channels() != fwd_in_format_.num_channels()) ||
|
||||
is_data_processed);
|
||||
is_data_processed || transient_suppressor_enabled_);
|
||||
}
|
||||
|
||||
bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const {
|
||||
@ -784,7 +930,8 @@ bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const {
|
||||
}
|
||||
|
||||
bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {
|
||||
if (!is_data_processed && !voice_detection_->is_enabled()) {
|
||||
if (!is_data_processed && !voice_detection_->is_enabled() &&
|
||||
!transient_suppressor_enabled_) {
|
||||
// Only level_estimator_ is enabled.
|
||||
return false;
|
||||
} else if (fwd_proc_format_.rate() == kSampleRate32kHz ||
|
||||
@ -795,6 +942,30 @@ bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::InitializeExperimentalAgc() {
|
||||
if (use_new_agc_) {
|
||||
if (!agc_manager_.get()) {
|
||||
agc_manager_.reset(
|
||||
new AgcManagerDirect(gain_control_, gain_control_for_new_agc_.get()));
|
||||
}
|
||||
agc_manager_->Initialize();
|
||||
agc_manager_->SetCaptureMuted(output_will_be_muted_);
|
||||
}
|
||||
return kNoError;
|
||||
}
|
||||
|
||||
int AudioProcessingImpl::InitializeTransient() {
|
||||
if (transient_suppressor_enabled_) {
|
||||
if (!transient_suppressor_.get()) {
|
||||
transient_suppressor_.reset(new TransientSuppressor());
|
||||
}
|
||||
transient_suppressor_->Initialize(fwd_proc_format_.rate(),
|
||||
split_rate_,
|
||||
fwd_out_format_.num_channels());
|
||||
}
|
||||
return kNoError;
|
||||
}
|
||||
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
int AudioProcessingImpl::WriteMessageToDebugFile() {
|
||||
int32_t size = event_msg_->ByteSize();
|
||||
|
@ -8,28 +8,32 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_
|
||||
|
||||
#include "webrtc/modules/audio_processing/include/audio_processing.h"
|
||||
|
||||
#include <list>
|
||||
#include <string>
|
||||
|
||||
#include "webrtc/base/thread_annotations.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AgcManagerDirect;
|
||||
class AudioBuffer;
|
||||
class CriticalSectionWrapper;
|
||||
class EchoCancellationImpl;
|
||||
class EchoControlMobileImpl;
|
||||
class FileWrapper;
|
||||
class GainControlImpl;
|
||||
class GainControlForNewAgc;
|
||||
class HighPassFilterImpl;
|
||||
class LevelEstimatorImpl;
|
||||
class NoiseSuppressionImpl;
|
||||
class ProcessingComponent;
|
||||
class TransientSuppressor;
|
||||
class VoiceDetectionImpl;
|
||||
|
||||
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
|
||||
@ -138,7 +142,7 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||
|
||||
protected:
|
||||
// Overridden in a mock.
|
||||
virtual int InitializeLocked();
|
||||
virtual int InitializeLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
|
||||
|
||||
private:
|
||||
int InitializeLocked(int input_sample_rate_hz,
|
||||
@ -146,20 +150,24 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||
int reverse_sample_rate_hz,
|
||||
int num_input_channels,
|
||||
int num_output_channels,
|
||||
int num_reverse_channels);
|
||||
int num_reverse_channels)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(crit_);
|
||||
int MaybeInitializeLocked(int input_sample_rate_hz,
|
||||
int output_sample_rate_hz,
|
||||
int reverse_sample_rate_hz,
|
||||
int num_input_channels,
|
||||
int num_output_channels,
|
||||
int num_reverse_channels);
|
||||
int ProcessStreamLocked();
|
||||
int AnalyzeReverseStreamLocked();
|
||||
int num_reverse_channels)
|
||||
EXCLUSIVE_LOCKS_REQUIRED(crit_);
|
||||
int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
|
||||
int AnalyzeReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
|
||||
|
||||
bool is_data_processed() const;
|
||||
bool output_copy_needed(bool is_data_processed) const;
|
||||
bool synthesis_needed(bool is_data_processed) const;
|
||||
bool analysis_needed(bool is_data_processed) const;
|
||||
int InitializeExperimentalAgc() EXCLUSIVE_LOCKS_REQUIRED(crit_);
|
||||
int InitializeTransient() EXCLUSIVE_LOCKS_REQUIRED(crit_);
|
||||
|
||||
EchoCancellationImpl* echo_cancellation_;
|
||||
EchoControlMobileImpl* echo_control_mobile_;
|
||||
@ -168,6 +176,7 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||
LevelEstimatorImpl* level_estimator_;
|
||||
NoiseSuppressionImpl* noise_suppression_;
|
||||
VoiceDetectionImpl* voice_detection_;
|
||||
scoped_ptr<GainControlForNewAgc> gain_control_for_new_agc_;
|
||||
|
||||
std::list<ProcessingComponent*> component_list_;
|
||||
CriticalSectionWrapper* crit_;
|
||||
@ -199,8 +208,15 @@ class AudioProcessingImpl : public AudioProcessing {
|
||||
bool output_will_be_muted_;
|
||||
|
||||
bool key_pressed_;
|
||||
|
||||
// Only set through the constructor's Config parameter.
|
||||
const bool use_new_agc_;
|
||||
scoped_ptr<AgcManagerDirect> agc_manager_ GUARDED_BY(crit_);
|
||||
|
||||
bool transient_suppressor_enabled_;
|
||||
scoped_ptr<TransientSuppressor> transient_suppressor_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_
|
||||
|
@ -27,7 +27,9 @@ class MockInitialize : public AudioProcessingImpl {
|
||||
}
|
||||
|
||||
MOCK_METHOD0(InitializeLocked, int());
|
||||
int RealInitializeLocked() { return AudioProcessingImpl::InitializeLocked(); }
|
||||
int RealInitializeLocked() NO_THREAD_SAFETY_ANALYSIS {
|
||||
return AudioProcessingImpl::InitializeLocked();
|
||||
}
|
||||
};
|
||||
|
||||
TEST(AudioProcessingImplTest, AudioParameterChangeTriggersInit) {
|
||||
|
@ -46,6 +46,33 @@
|
||||
],
|
||||
'sources': [ 'test/unpack.cc', ],
|
||||
},
|
||||
{
|
||||
'target_name': 'transient_suppression_test',
|
||||
'type': 'executable',
|
||||
'dependencies': [
|
||||
'<(DEPTH)/testing/gtest.gyp:gtest',
|
||||
'<(DEPTH)/third_party/gflags/gflags.gyp:gflags',
|
||||
'<(webrtc_root)/test/test.gyp:test_support',
|
||||
'<(webrtc_root)/modules/modules.gyp:audio_processing',
|
||||
],
|
||||
'sources': [
|
||||
'transient/transient_suppression_test.cc',
|
||||
'transient/file_utils.cc',
|
||||
'transient/file_utils.h',
|
||||
],
|
||||
}, # transient_suppression_test
|
||||
{
|
||||
'target_name': 'click_annotate',
|
||||
'type': 'executable',
|
||||
'dependencies': [
|
||||
'<(webrtc_root)/modules/modules.gyp:audio_processing',
|
||||
],
|
||||
'sources': [
|
||||
'transient/click_annotate.cc',
|
||||
'transient/file_utils.cc',
|
||||
'transient/file_utils.h',
|
||||
],
|
||||
}, # click_annotate
|
||||
],
|
||||
}],
|
||||
],
|
||||
|
114
webrtc/modules/audio_processing/transient/click_annotate.cc
Normal file
114
webrtc/modules/audio_processing/transient/click_annotate.cc
Normal file
@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <cfloat>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/transient_detector.h"
|
||||
#include "webrtc/modules/audio_processing/transient/file_utils.h"
|
||||
#include "webrtc/system_wrappers/interface/file_wrapper.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
|
||||
using webrtc::FileWrapper;
|
||||
using webrtc::TransientDetector;
|
||||
using webrtc::scoped_ptr;
|
||||
|
||||
// Application to generate a RTP timing file.
|
||||
// Opens the PCM file and divides the signal in frames.
|
||||
// Creates a send times array, one for each step.
|
||||
// Each block that contains a transient, has an infinite send time.
|
||||
// The resultant array is written to a DAT file
|
||||
// Returns -1 on error or |lost_packets| otherwise.
|
||||
int main(int argc, char* argv[]) {
|
||||
if (argc != 5) {
|
||||
printf("\n%s - Application to generate a RTP timing file.\n\n", argv[0]);
|
||||
printf("%s PCMfile DATfile chunkSize sampleRate\n\n", argv[0]);
|
||||
printf("Opens the PCMfile with sampleRate in Hertz.\n");
|
||||
printf("Creates a send times array, one for each chunkSize ");
|
||||
printf("milliseconds step.\n");
|
||||
printf("Each block that contains a transient, has an infinite send time. ");
|
||||
printf("The resultant array is written to a DATfile.\n\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
scoped_ptr<FileWrapper> pcm_file(FileWrapper::Create());
|
||||
pcm_file->OpenFile(argv[1], true, false, false);
|
||||
if (!pcm_file->Open()) {
|
||||
printf("\nThe %s could not be opened.\n\n", argv[1]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
scoped_ptr<FileWrapper> dat_file(FileWrapper::Create());
|
||||
dat_file->OpenFile(argv[2], false, false, false);
|
||||
if (!dat_file->Open()) {
|
||||
printf("\nThe %s could not be opened.\n\n", argv[2]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int chunk_size_ms = atoi(argv[3]);
|
||||
if (chunk_size_ms <= 0) {
|
||||
printf("\nThe chunkSize must be a positive integer\n\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int sample_rate_hz = atoi(argv[4]);
|
||||
if (sample_rate_hz <= 0) {
|
||||
printf("\nThe sampleRate must be a positive integer\n\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
TransientDetector detector(sample_rate_hz);
|
||||
int lost_packets = 0;
|
||||
size_t audio_buffer_length = chunk_size_ms * sample_rate_hz / 1000;
|
||||
scoped_ptr<float[]> audio_buffer(new float[audio_buffer_length]);
|
||||
std::vector<float> send_times;
|
||||
|
||||
// Read first buffer from the PCM test file.
|
||||
size_t file_samples_read = ReadInt16FromFileToFloatBuffer(
|
||||
pcm_file.get(),
|
||||
audio_buffer_length,
|
||||
audio_buffer.get());
|
||||
for (int time = 0; file_samples_read > 0; time += chunk_size_ms) {
|
||||
// Pad the rest of the buffer with zeros.
|
||||
for (size_t i = file_samples_read; i < audio_buffer_length; ++i) {
|
||||
audio_buffer[i] = 0.0;
|
||||
}
|
||||
float value =
|
||||
detector.Detect(audio_buffer.get(), audio_buffer_length, NULL, 0);
|
||||
if (value < 0.5f) {
|
||||
value = time;
|
||||
} else {
|
||||
value = FLT_MAX;
|
||||
++lost_packets;
|
||||
}
|
||||
send_times.push_back(value);
|
||||
|
||||
// Read next buffer from the PCM test file.
|
||||
file_samples_read = ReadInt16FromFileToFloatBuffer(pcm_file.get(),
|
||||
audio_buffer_length,
|
||||
audio_buffer.get());
|
||||
}
|
||||
|
||||
size_t floats_written = WriteFloatBufferToFile(dat_file.get(),
|
||||
send_times.size(),
|
||||
&send_times[0]);
|
||||
|
||||
if (floats_written == 0) {
|
||||
printf("\nThe send times could not be written to DAT file\n\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
pcm_file->CloseFile();
|
||||
dat_file->CloseFile();
|
||||
|
||||
return lost_packets;
|
||||
}
|
27
webrtc/modules/audio_processing/transient/common.h
Normal file
27
webrtc/modules/audio_processing/transient/common.h
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_
|
||||
namespace webrtc {
|
||||
namespace ts {
|
||||
|
||||
static const float kPi = 3.14159265358979323846f;
|
||||
static const int kChunkSizeMs = 10;
|
||||
enum {
|
||||
kSampleRate8kHz = 8000,
|
||||
kSampleRate16kHz = 16000,
|
||||
kSampleRate32kHz = 32000,
|
||||
kSampleRate48kHz = 48000
|
||||
};
|
||||
|
||||
} // namespace ts
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_
|
@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// This header file defines the coefficients of the FIR based approximation of
|
||||
// the Meyer Wavelet
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_
|
||||
|
||||
// Decomposition coefficients Daubechies 8.
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
const int kDaubechies8CoefficientsLength = 16;
|
||||
|
||||
const float kDaubechies8HighPassCoefficients[kDaubechies8CoefficientsLength]
|
||||
= {
|
||||
-5.44158422430816093862e-02f,
|
||||
3.12871590914465924627e-01f,
|
||||
-6.75630736298012846142e-01f,
|
||||
5.85354683654869090148e-01f,
|
||||
1.58291052560238926228e-02f,
|
||||
-2.84015542962428091389e-01f,
|
||||
-4.72484573997972536787e-04f,
|
||||
1.28747426620186011803e-01f,
|
||||
1.73693010020221083600e-02f,
|
||||
-4.40882539310647192377e-02f,
|
||||
-1.39810279170155156436e-02f,
|
||||
8.74609404701565465445e-03f,
|
||||
4.87035299301066034600e-03f,
|
||||
-3.91740372995977108837e-04f,
|
||||
-6.75449405998556772109e-04f,
|
||||
-1.17476784002281916305e-04f
|
||||
};
|
||||
|
||||
const float kDaubechies8LowPassCoefficients[kDaubechies8CoefficientsLength] = {
|
||||
-1.17476784002281916305e-04f,
|
||||
6.75449405998556772109e-04f,
|
||||
-3.91740372995977108837e-04f,
|
||||
-4.87035299301066034600e-03f,
|
||||
8.74609404701565465445e-03f,
|
||||
1.39810279170155156436e-02f,
|
||||
-4.40882539310647192377e-02f,
|
||||
-1.73693010020221083600e-02f,
|
||||
1.28747426620186011803e-01f,
|
||||
4.72484573997972536787e-04f,
|
||||
-2.84015542962428091389e-01f,
|
||||
-1.58291052560238926228e-02f,
|
||||
5.85354683654869090148e-01f,
|
||||
6.75630736298012846142e-01f,
|
||||
3.12871590914465924627e-01f,
|
||||
5.44158422430816093862e-02f
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_
|
70
webrtc/modules/audio_processing/transient/dyadic_decimator.h
Normal file
70
webrtc/modules/audio_processing/transient/dyadic_decimator.h
Normal file
@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
// Provides a set of static methods to perform dyadic decimations.
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Returns the proper length of the output buffer that you should use for the
|
||||
// given |in_length| and decimation |odd_sequence|.
|
||||
// Return -1 on error.
|
||||
inline size_t GetOutLengthToDyadicDecimate(size_t in_length,
|
||||
bool odd_sequence) {
|
||||
size_t out_length = in_length / 2;
|
||||
|
||||
if (in_length % 2 == 1 && !odd_sequence) {
|
||||
++out_length;
|
||||
}
|
||||
|
||||
return out_length;
|
||||
}
|
||||
|
||||
// Performs a dyadic decimation: removes every odd/even member of a sequence
|
||||
// halving its overall length.
|
||||
// Arguments:
|
||||
// in: array of |in_length|.
|
||||
// odd_sequence: If false, the odd members will be removed (1, 3, 5, ...);
|
||||
// if true, the even members will be removed (0, 2, 4, ...).
|
||||
// out: array of |out_length|. |out_length| must be large enough to
|
||||
// hold the decimated output. The necessary length can be provided by
|
||||
// GetOutLengthToDyadicDecimate().
|
||||
// Must be previously allocated.
|
||||
// Returns the number of output samples, -1 on error.
|
||||
template<typename T>
|
||||
static size_t DyadicDecimate(const T* in,
|
||||
size_t in_length,
|
||||
bool odd_sequence,
|
||||
T* out,
|
||||
size_t out_length) {
|
||||
size_t half_length = GetOutLengthToDyadicDecimate(in_length, odd_sequence);
|
||||
|
||||
if (!in || !out || in_length <= 0 || out_length < half_length) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t output_samples = 0;
|
||||
size_t index_adjustment = odd_sequence ? 1 : 0;
|
||||
for (output_samples = 0; output_samples < half_length; ++output_samples) {
|
||||
out[output_samples] = in[output_samples * 2 + index_adjustment];
|
||||
}
|
||||
|
||||
return output_samples;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_
|
@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/dyadic_decimator.h"
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const size_t kEvenBufferLength = 6;
|
||||
static const size_t kOddBufferLength = 5;
|
||||
static const size_t kOutBufferLength = 3;
|
||||
|
||||
int16_t const test_buffer_even_len[] = {0, 1, 2, 3, 4, 5};
|
||||
int16_t const test_buffer_odd_len[] = {0, 1, 2, 3, 4};
|
||||
int16_t test_buffer_out[kOutBufferLength];
|
||||
|
||||
TEST(DyadicDecimatorTest, GetOutLengthToDyadicDecimate) {
|
||||
EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(6, false));
|
||||
EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(6, true));
|
||||
EXPECT_EQ(3u, GetOutLengthToDyadicDecimate(5, false));
|
||||
EXPECT_EQ(2u, GetOutLengthToDyadicDecimate(5, true));
|
||||
}
|
||||
|
||||
|
||||
TEST(DyadicDecimatorTest, DyadicDecimateErrorValues) {
|
||||
size_t out_samples = 0;
|
||||
|
||||
out_samples = DyadicDecimate(static_cast<int16_t*>(NULL),
|
||||
kEvenBufferLength,
|
||||
false, // Even sequence.
|
||||
test_buffer_out,
|
||||
kOutBufferLength);
|
||||
EXPECT_EQ(0u, out_samples);
|
||||
|
||||
out_samples = DyadicDecimate(test_buffer_even_len,
|
||||
kEvenBufferLength,
|
||||
false, // Even sequence.
|
||||
static_cast<int16_t*>(NULL),
|
||||
kOutBufferLength);
|
||||
EXPECT_EQ(0u, out_samples);
|
||||
|
||||
// Less than required |out_length|.
|
||||
out_samples = DyadicDecimate(test_buffer_even_len,
|
||||
kEvenBufferLength,
|
||||
false, // Even sequence.
|
||||
test_buffer_out,
|
||||
2);
|
||||
EXPECT_EQ(0u, out_samples);
|
||||
}
|
||||
|
||||
TEST(DyadicDecimatorTest, DyadicDecimateEvenLengthEvenSequence) {
|
||||
size_t expected_out_samples =
|
||||
GetOutLengthToDyadicDecimate(kEvenBufferLength, false);
|
||||
|
||||
size_t out_samples = DyadicDecimate(test_buffer_even_len,
|
||||
kEvenBufferLength,
|
||||
false, // Even sequence.
|
||||
test_buffer_out,
|
||||
kOutBufferLength);
|
||||
|
||||
EXPECT_EQ(expected_out_samples, out_samples);
|
||||
|
||||
EXPECT_EQ(0, test_buffer_out[0]);
|
||||
EXPECT_EQ(2, test_buffer_out[1]);
|
||||
EXPECT_EQ(4, test_buffer_out[2]);
|
||||
}
|
||||
|
||||
TEST(DyadicDecimatorTest, DyadicDecimateEvenLengthOddSequence) {
|
||||
size_t expected_out_samples =
|
||||
GetOutLengthToDyadicDecimate(kEvenBufferLength, true);
|
||||
|
||||
size_t out_samples = DyadicDecimate(test_buffer_even_len,
|
||||
kEvenBufferLength,
|
||||
true, // Odd sequence.
|
||||
test_buffer_out,
|
||||
kOutBufferLength);
|
||||
|
||||
EXPECT_EQ(expected_out_samples, out_samples);
|
||||
|
||||
EXPECT_EQ(1, test_buffer_out[0]);
|
||||
EXPECT_EQ(3, test_buffer_out[1]);
|
||||
EXPECT_EQ(5, test_buffer_out[2]);
|
||||
}
|
||||
|
||||
TEST(DyadicDecimatorTest, DyadicDecimateOddLengthEvenSequence) {
|
||||
size_t expected_out_samples =
|
||||
GetOutLengthToDyadicDecimate(kOddBufferLength, false);
|
||||
|
||||
size_t out_samples = DyadicDecimate(test_buffer_odd_len,
|
||||
kOddBufferLength,
|
||||
false, // Even sequence.
|
||||
test_buffer_out,
|
||||
kOutBufferLength);
|
||||
|
||||
EXPECT_EQ(expected_out_samples, out_samples);
|
||||
|
||||
EXPECT_EQ(0, test_buffer_out[0]);
|
||||
EXPECT_EQ(2, test_buffer_out[1]);
|
||||
EXPECT_EQ(4, test_buffer_out[2]);
|
||||
}
|
||||
|
||||
TEST(DyadicDecimatorTest, DyadicDecimateOddLengthOddSequence) {
|
||||
size_t expected_out_samples =
|
||||
GetOutLengthToDyadicDecimate(kOddBufferLength, true);
|
||||
|
||||
size_t out_samples = DyadicDecimate(test_buffer_odd_len,
|
||||
kOddBufferLength,
|
||||
true, // Odd sequence.
|
||||
test_buffer_out,
|
||||
kOutBufferLength);
|
||||
|
||||
EXPECT_EQ(expected_out_samples, out_samples);
|
||||
|
||||
EXPECT_EQ(1, test_buffer_out[0]);
|
||||
EXPECT_EQ(3, test_buffer_out[1]);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
257
webrtc/modules/audio_processing/transient/file_utils.cc
Normal file
257
webrtc/modules/audio_processing/transient/file_utils.cc
Normal file
@ -0,0 +1,257 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/file_utils.h"
|
||||
|
||||
#include "webrtc/system_wrappers/interface/file_wrapper.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out) {
|
||||
if (!bytes || !out) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t binary_value = 0;
|
||||
for (int i = 3; i >= 0; --i) {
|
||||
binary_value <<= 8;
|
||||
binary_value += bytes[i];
|
||||
}
|
||||
|
||||
*out = bit_cast<float>(binary_value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out) {
|
||||
if (!bytes || !out) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint64_t binary_value = 0;
|
||||
for (int i = 7; i >= 0; --i) {
|
||||
binary_value <<= 8;
|
||||
binary_value += bytes[i];
|
||||
}
|
||||
|
||||
*out = bit_cast<double>(binary_value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]) {
|
||||
if (!out_bytes) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t binary_value = bit_cast<uint32_t>(value);
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
out_bytes[i] = binary_value;
|
||||
binary_value >>= 8;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]) {
|
||||
if (!out_bytes) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint64_t binary_value = bit_cast<uint64_t>(value);
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
out_bytes[i] = binary_value;
|
||||
binary_value >>= 8;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ReadInt16BufferFromFile(FileWrapper* file,
|
||||
size_t length,
|
||||
int16_t* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
scoped_ptr<uint8_t[]> byte_array(new uint8_t[2]);
|
||||
|
||||
size_t int16s_read = 0;
|
||||
|
||||
while (int16s_read < length) {
|
||||
size_t bytes_read = file->Read(byte_array.get(), 2);
|
||||
if (bytes_read < 2) {
|
||||
break;
|
||||
}
|
||||
int16_t value = byte_array[1];
|
||||
value <<= 8;
|
||||
value += byte_array[0];
|
||||
buffer[int16s_read] = value;
|
||||
++int16s_read;
|
||||
}
|
||||
|
||||
return int16s_read;
|
||||
}
|
||||
|
||||
size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file,
|
||||
size_t length,
|
||||
float* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
scoped_ptr<int16_t[]> buffer16(new int16_t[length]);
|
||||
|
||||
size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get());
|
||||
|
||||
for (size_t i = 0; i < int16s_read; ++i) {
|
||||
buffer[i] = buffer16[i];
|
||||
}
|
||||
|
||||
return int16s_read;
|
||||
}
|
||||
|
||||
size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file,
|
||||
size_t length,
|
||||
double* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
scoped_ptr<int16_t[]> buffer16(new int16_t[length]);
|
||||
|
||||
size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get());
|
||||
|
||||
for (size_t i = 0; i < int16s_read; ++i) {
|
||||
buffer[i] = buffer16[i];
|
||||
}
|
||||
|
||||
return int16s_read;
|
||||
}
|
||||
|
||||
size_t ReadFloatBufferFromFile(FileWrapper* file,
|
||||
size_t length,
|
||||
float* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
scoped_ptr<uint8_t[]> byte_array(new uint8_t[4]);
|
||||
|
||||
size_t floats_read = 0;
|
||||
|
||||
while (floats_read < length) {
|
||||
size_t bytes_read = file->Read(byte_array.get(), 4);
|
||||
if (bytes_read < 4) {
|
||||
break;
|
||||
}
|
||||
ConvertByteArrayToFloat(byte_array.get(), &buffer[floats_read]);
|
||||
++floats_read;
|
||||
}
|
||||
|
||||
return floats_read;
|
||||
}
|
||||
|
||||
size_t ReadDoubleBufferFromFile(FileWrapper* file,
|
||||
size_t length,
|
||||
double* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
scoped_ptr<uint8_t[]> byte_array(new uint8_t[8]);
|
||||
|
||||
size_t doubles_read = 0;
|
||||
|
||||
while (doubles_read < length) {
|
||||
size_t bytes_read = file->Read(byte_array.get(), 8);
|
||||
if (bytes_read < 8) {
|
||||
break;
|
||||
}
|
||||
ConvertByteArrayToDouble(byte_array.get(), &buffer[doubles_read]);
|
||||
++doubles_read;
|
||||
}
|
||||
|
||||
return doubles_read;
|
||||
}
|
||||
|
||||
size_t WriteInt16BufferToFile(FileWrapper* file,
|
||||
size_t length,
|
||||
const int16_t* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
scoped_ptr<uint8_t[]> byte_array(new uint8_t[2]);
|
||||
|
||||
size_t int16s_written = 0;
|
||||
|
||||
for (int16s_written = 0; int16s_written < length; ++int16s_written) {
|
||||
// Get byte representation.
|
||||
byte_array[0] = buffer[int16s_written] & 0xFF;
|
||||
byte_array[1] = (buffer[int16s_written] >> 8) & 0xFF;
|
||||
|
||||
file->Write(byte_array.get(), 2);
|
||||
}
|
||||
|
||||
file->Flush();
|
||||
|
||||
return int16s_written;
|
||||
}
|
||||
|
||||
size_t WriteFloatBufferToFile(FileWrapper* file,
|
||||
size_t length,
|
||||
const float* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
scoped_ptr<uint8_t[]> byte_array(new uint8_t[4]);
|
||||
|
||||
size_t floats_written = 0;
|
||||
|
||||
for (floats_written = 0; floats_written < length; ++floats_written) {
|
||||
// Get byte representation.
|
||||
ConvertFloatToByteArray(buffer[floats_written], byte_array.get());
|
||||
|
||||
file->Write(byte_array.get(), 4);
|
||||
}
|
||||
|
||||
file->Flush();
|
||||
|
||||
return floats_written;
|
||||
}
|
||||
|
||||
size_t WriteDoubleBufferToFile(FileWrapper* file,
|
||||
size_t length,
|
||||
const double* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
scoped_ptr<uint8_t[]> byte_array(new uint8_t[8]);
|
||||
|
||||
size_t doubles_written = 0;
|
||||
|
||||
for (doubles_written = 0; doubles_written < length; ++doubles_written) {
|
||||
// Get byte representation.
|
||||
ConvertDoubleToByteArray(buffer[doubles_written], byte_array.get());
|
||||
|
||||
file->Write(byte_array.get(), 8);
|
||||
}
|
||||
|
||||
file->Flush();
|
||||
|
||||
return doubles_written;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
119
webrtc/modules/audio_processing/transient/file_utils.h
Normal file
119
webrtc/modules/audio_processing/transient/file_utils.h
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "webrtc/base/compile_assert.h"
|
||||
#include "webrtc/system_wrappers/interface/file_wrapper.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This is a copy of the cast included in the Chromium codebase here:
|
||||
// http://cs.chromium.org/src/third_party/cld/base/casts.h
|
||||
template <class Dest, class Source>
|
||||
inline Dest bit_cast(const Source& source) {
|
||||
// A compile error here means your Dest and Source have different sizes.
|
||||
COMPILE_ASSERT(sizeof(Dest) == sizeof(Source),
|
||||
dest_and_source_have_different_sizes);
|
||||
|
||||
Dest dest;
|
||||
memcpy(&dest, &source, sizeof(dest));
|
||||
return dest;
|
||||
}
|
||||
|
||||
// Converts the byte array with binary float representation to float.
|
||||
// Bytes must be in little-endian order.
|
||||
// Returns 0 if correct, -1 on error.
|
||||
int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out);
|
||||
|
||||
// Converts the byte array with binary double representation to double.
|
||||
// Bytes must be in little-endian order.
|
||||
// Returns 0 if correct, -1 on error.
|
||||
int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out);
|
||||
|
||||
// Converts a float to a byte array with binary float representation.
|
||||
// Bytes will be in little-endian order.
|
||||
// Returns 0 if correct, -1 on error.
|
||||
int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]);
|
||||
|
||||
// Converts a double to a byte array with binary double representation.
|
||||
// Bytes will be in little-endian order.
|
||||
// Returns 0 if correct, -1 on error.
|
||||
int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]);
|
||||
|
||||
// Reads |length| 16-bit integers from |file| to |buffer|.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of 16-bit integers read or -1 on error.
|
||||
size_t ReadInt16BufferFromFile(FileWrapper* file,
|
||||
size_t length,
|
||||
int16_t* buffer);
|
||||
|
||||
// Reads |length| 16-bit integers from |file| and stores those values
|
||||
// (converting them) in |buffer|.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of 16-bit integers read or -1 on error.
|
||||
size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file,
|
||||
size_t length,
|
||||
float* buffer);
|
||||
|
||||
// Reads |length| 16-bit integers from |file| and stores those values
|
||||
// (converting them) in |buffer|.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of 16-bit integers read or -1 on error.
|
||||
size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file,
|
||||
size_t length,
|
||||
double* buffer);
|
||||
|
||||
// Reads |length| floats in binary representation (4 bytes) from |file| to
|
||||
// |buffer|.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of floats read or -1 on error.
|
||||
size_t ReadFloatBufferFromFile(FileWrapper* file, size_t length, float* buffer);
|
||||
|
||||
// Reads |length| doubles in binary representation (8 bytes) from |file| to
|
||||
// |buffer|.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of doubles read or -1 on error.
|
||||
size_t ReadDoubleBufferFromFile(FileWrapper* file,
|
||||
size_t length,
|
||||
double* buffer);
|
||||
|
||||
// Writes |length| 16-bit integers from |buffer| in binary representation (2
|
||||
// bytes) to |file|. It flushes |file|, so after this call there are no
|
||||
// writings pending.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of doubles written or -1 on error.
|
||||
size_t WriteInt16BufferToFile(FileWrapper* file,
|
||||
size_t length,
|
||||
const int16_t* buffer);
|
||||
|
||||
// Writes |length| floats from |buffer| in binary representation (4 bytes) to
|
||||
// |file|. It flushes |file|, so after this call there are no writtings pending.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of doubles written or -1 on error.
|
||||
size_t WriteFloatBufferToFile(FileWrapper* file,
|
||||
size_t length,
|
||||
const float* buffer);
|
||||
|
||||
// Writes |length| doubles from |buffer| in binary representation (8 bytes) to
|
||||
// |file|. It flushes |file|, so after this call there are no writings pending.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of doubles written or -1 on error.
|
||||
size_t WriteDoubleBufferToFile(FileWrapper* file,
|
||||
size_t length,
|
||||
const double* buffer);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_
|
484
webrtc/modules/audio_processing/transient/file_utils_unittest.cc
Normal file
484
webrtc/modules/audio_processing/transient/file_utils_unittest.cc
Normal file
@ -0,0 +1,484 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/file_utils.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "webrtc/system_wrappers/interface/file_wrapper.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
#include "webrtc/test/testsupport/fileutils.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const uint8_t kPiBytesf[4] = {0xDB, 0x0F, 0x49, 0x40};
|
||||
static const uint8_t kEBytesf[4] = {0x54, 0xF8, 0x2D, 0x40};
|
||||
static const uint8_t kAvogadroBytesf[4] = {0x2F, 0x0C, 0xFF, 0x66};
|
||||
|
||||
static const uint8_t kPiBytes[8] =
|
||||
{0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40};
|
||||
static const uint8_t kEBytes[8] =
|
||||
{0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40};
|
||||
static const uint8_t kAvogadroBytes[8] =
|
||||
{0xF4, 0xBC, 0xA8, 0xDF, 0x85, 0xE1, 0xDF, 0x44};
|
||||
|
||||
static const double kPi = 3.14159265358979323846;
|
||||
static const double kE = 2.71828182845904523536;
|
||||
static const double kAvogadro = 602214100000000000000000.0;
|
||||
|
||||
class TransientFileUtilsTest: public ::testing::Test {
|
||||
protected:
|
||||
TransientFileUtilsTest()
|
||||
: kTestFileName(
|
||||
test::ResourcePath("audio_processing/transient/double-utils",
|
||||
"dat")),
|
||||
kTestFileNamef(
|
||||
test::ResourcePath("audio_processing/transient/float-utils",
|
||||
"dat")) {}
|
||||
// This file (used in some tests) contains binary data. The data correspond to
|
||||
// the double representation of the constants: Pi, E, and the Avogadro's
|
||||
// Number;
|
||||
// appended in that order.
|
||||
const std::string kTestFileName;
|
||||
|
||||
// This file (used in some tests) contains binary data. The data correspond to
|
||||
// the float representation of the constants: Pi, E, and the Avogadro's
|
||||
// Number;
|
||||
// appended in that order.
|
||||
const std::string kTestFileNamef;
|
||||
};
|
||||
|
||||
TEST_F(TransientFileUtilsTest, ConvertByteArrayToFloat) {
|
||||
float value = 0.0;
|
||||
|
||||
EXPECT_EQ(0, ConvertByteArrayToFloat(kPiBytesf, &value));
|
||||
EXPECT_FLOAT_EQ(kPi, value);
|
||||
|
||||
EXPECT_EQ(0, ConvertByteArrayToFloat(kEBytesf, &value));
|
||||
EXPECT_FLOAT_EQ(kE, value);
|
||||
|
||||
EXPECT_EQ(0, ConvertByteArrayToFloat(kAvogadroBytesf, &value));
|
||||
EXPECT_FLOAT_EQ(kAvogadro, value);
|
||||
}
|
||||
|
||||
TEST_F(TransientFileUtilsTest, ConvertByteArrayToDouble) {
|
||||
double value = 0.0;
|
||||
|
||||
EXPECT_EQ(0, ConvertByteArrayToDouble(kPiBytes, &value));
|
||||
EXPECT_DOUBLE_EQ(kPi, value);
|
||||
|
||||
EXPECT_EQ(0, ConvertByteArrayToDouble(kEBytes, &value));
|
||||
EXPECT_DOUBLE_EQ(kE, value);
|
||||
|
||||
EXPECT_EQ(0, ConvertByteArrayToDouble(kAvogadroBytes, &value));
|
||||
EXPECT_DOUBLE_EQ(kAvogadro, value);
|
||||
}
|
||||
|
||||
TEST_F(TransientFileUtilsTest, ConvertFloatToByteArray) {
|
||||
scoped_ptr<uint8_t[]> bytes(new uint8_t[4]);
|
||||
|
||||
EXPECT_EQ(0, ConvertFloatToByteArray(kPi, bytes.get()));
|
||||
EXPECT_EQ(0, memcmp(bytes.get(), kPiBytesf, 4));
|
||||
|
||||
EXPECT_EQ(0, ConvertFloatToByteArray(kE, bytes.get()));
|
||||
EXPECT_EQ(0, memcmp(bytes.get(), kEBytesf, 4));
|
||||
|
||||
EXPECT_EQ(0, ConvertFloatToByteArray(kAvogadro, bytes.get()));
|
||||
EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytesf, 4));
|
||||
}
|
||||
|
||||
TEST_F(TransientFileUtilsTest, ConvertDoubleToByteArray) {
|
||||
scoped_ptr<uint8_t[]> bytes(new uint8_t[8]);
|
||||
|
||||
EXPECT_EQ(0, ConvertDoubleToByteArray(kPi, bytes.get()));
|
||||
EXPECT_EQ(0, memcmp(bytes.get(), kPiBytes, 8));
|
||||
|
||||
EXPECT_EQ(0, ConvertDoubleToByteArray(kE, bytes.get()));
|
||||
EXPECT_EQ(0, memcmp(bytes.get(), kEBytes, 8));
|
||||
|
||||
EXPECT_EQ(0, ConvertDoubleToByteArray(kAvogadro, bytes.get()));
|
||||
EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytes, 8));
|
||||
}
|
||||
|
||||
TEST_F(TransientFileUtilsTest, ReadInt16BufferFromFile) {
|
||||
std::string test_filename = kTestFileName;
|
||||
|
||||
scoped_ptr<FileWrapper> file(FileWrapper::Create());
|
||||
|
||||
file->OpenFile(test_filename.c_str(),
|
||||
true, // Read only.
|
||||
true, // Loop.
|
||||
false); // No text.
|
||||
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
|
||||
<< kTestFileName.c_str();
|
||||
|
||||
const size_t kBufferLength = 12;
|
||||
scoped_ptr<int16_t[]> buffer(new int16_t[kBufferLength]);
|
||||
|
||||
EXPECT_EQ(kBufferLength, ReadInt16BufferFromFile(file.get(),
|
||||
kBufferLength,
|
||||
buffer.get()));
|
||||
EXPECT_EQ(22377, buffer[4]);
|
||||
EXPECT_EQ(16389, buffer[7]);
|
||||
EXPECT_EQ(17631, buffer[kBufferLength - 1]);
|
||||
|
||||
file->Rewind();
|
||||
|
||||
// The next test is for checking the case where there are not as much data as
|
||||
// needed in the file, but reads to the end, and it returns the number of
|
||||
// int16s read.
|
||||
const size_t kBufferLenghtLargerThanFile = kBufferLength * 2;
|
||||
buffer.reset(new int16_t[kBufferLenghtLargerThanFile]);
|
||||
EXPECT_EQ(kBufferLength, ReadInt16BufferFromFile(file.get(),
|
||||
kBufferLenghtLargerThanFile,
|
||||
buffer.get()));
|
||||
EXPECT_EQ(11544, buffer[0]);
|
||||
EXPECT_EQ(22377, buffer[4]);
|
||||
EXPECT_EQ(16389, buffer[7]);
|
||||
EXPECT_EQ(17631, buffer[kBufferLength - 1]);
|
||||
}
|
||||
|
||||
TEST_F(TransientFileUtilsTest, ReadInt16FromFileToFloatBuffer) {
|
||||
std::string test_filename = kTestFileName;
|
||||
|
||||
scoped_ptr<FileWrapper> file(FileWrapper::Create());
|
||||
|
||||
file->OpenFile(test_filename.c_str(),
|
||||
true, // Read only.
|
||||
true, // Loop.
|
||||
false); // No text.
|
||||
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
|
||||
<< kTestFileName.c_str();
|
||||
|
||||
const size_t kBufferLength = 12;
|
||||
scoped_ptr<float[]> buffer(new float[kBufferLength]);
|
||||
|
||||
EXPECT_EQ(kBufferLength, ReadInt16FromFileToFloatBuffer(file.get(),
|
||||
kBufferLength,
|
||||
buffer.get()));
|
||||
|
||||
EXPECT_DOUBLE_EQ(11544, buffer[0]);
|
||||
EXPECT_DOUBLE_EQ(22377, buffer[4]);
|
||||
EXPECT_DOUBLE_EQ(16389, buffer[7]);
|
||||
EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]);
|
||||
|
||||
file->Rewind();
|
||||
|
||||
// The next test is for checking the case where there are not as much data as
|
||||
// needed in the file, but reads to the end, and it returns the number of
|
||||
// int16s read.
|
||||
const size_t kBufferLenghtLargerThanFile = kBufferLength * 2;
|
||||
buffer.reset(new float[kBufferLenghtLargerThanFile]);
|
||||
EXPECT_EQ(kBufferLength,
|
||||
ReadInt16FromFileToFloatBuffer(file.get(),
|
||||
kBufferLenghtLargerThanFile,
|
||||
buffer.get()));
|
||||
EXPECT_DOUBLE_EQ(11544, buffer[0]);
|
||||
EXPECT_DOUBLE_EQ(22377, buffer[4]);
|
||||
EXPECT_DOUBLE_EQ(16389, buffer[7]);
|
||||
EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]);
|
||||
}
|
||||
|
||||
TEST_F(TransientFileUtilsTest, ReadInt16FromFileToDoubleBuffer) {
|
||||
std::string test_filename = kTestFileName;
|
||||
|
||||
scoped_ptr<FileWrapper> file(FileWrapper::Create());
|
||||
|
||||
file->OpenFile(test_filename.c_str(),
|
||||
true, // Read only.
|
||||
true, // Loop.
|
||||
false); // No text.
|
||||
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
|
||||
<< kTestFileName.c_str();
|
||||
|
||||
const size_t kBufferLength = 12;
|
||||
scoped_ptr<double[]> buffer(new double[kBufferLength]);
|
||||
|
||||
EXPECT_EQ(kBufferLength, ReadInt16FromFileToDoubleBuffer(file.get(),
|
||||
kBufferLength,
|
||||
buffer.get()));
|
||||
EXPECT_DOUBLE_EQ(11544, buffer[0]);
|
||||
EXPECT_DOUBLE_EQ(22377, buffer[4]);
|
||||
EXPECT_DOUBLE_EQ(16389, buffer[7]);
|
||||
EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]);
|
||||
|
||||
file->Rewind();
|
||||
|
||||
// The next test is for checking the case where there are not as much data as
|
||||
// needed in the file, but reads to the end, and it returns the number of
|
||||
// int16s read.
|
||||
const size_t kBufferLenghtLargerThanFile = kBufferLength * 2;
|
||||
buffer.reset(new double[kBufferLenghtLargerThanFile]);
|
||||
EXPECT_EQ(kBufferLength,
|
||||
ReadInt16FromFileToDoubleBuffer(file.get(),
|
||||
kBufferLenghtLargerThanFile,
|
||||
buffer.get()));
|
||||
EXPECT_DOUBLE_EQ(11544, buffer[0]);
|
||||
EXPECT_DOUBLE_EQ(22377, buffer[4]);
|
||||
EXPECT_DOUBLE_EQ(16389, buffer[7]);
|
||||
EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]);
|
||||
}
|
||||
|
||||
TEST_F(TransientFileUtilsTest, ReadFloatBufferFromFile) {
|
||||
std::string test_filename = kTestFileNamef;
|
||||
|
||||
scoped_ptr<FileWrapper> file(FileWrapper::Create());
|
||||
|
||||
file->OpenFile(test_filename.c_str(),
|
||||
true, // Read only.
|
||||
true, // Loop.
|
||||
false); // No text.
|
||||
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
|
||||
<< kTestFileNamef.c_str();
|
||||
|
||||
const size_t kBufferLength = 3;
|
||||
scoped_ptr<float[]> buffer(new float[kBufferLength]);
|
||||
|
||||
|
||||
EXPECT_EQ(kBufferLength, ReadFloatBufferFromFile(file.get(),
|
||||
kBufferLength,
|
||||
buffer.get()));
|
||||
EXPECT_FLOAT_EQ(kPi, buffer[0]);
|
||||
EXPECT_FLOAT_EQ(kE, buffer[1]);
|
||||
EXPECT_FLOAT_EQ(kAvogadro, buffer[2]);
|
||||
|
||||
file->Rewind();
|
||||
|
||||
// The next test is for checking the case where there are not as much data as
|
||||
// needed in the file, but reads to the end, and it returns the number of
|
||||
// doubles read.
|
||||
const size_t kBufferLenghtLargerThanFile = kBufferLength * 2;
|
||||
buffer.reset(new float[kBufferLenghtLargerThanFile]);
|
||||
EXPECT_EQ(kBufferLength, ReadFloatBufferFromFile(file.get(),
|
||||
kBufferLenghtLargerThanFile,
|
||||
buffer.get()));
|
||||
EXPECT_FLOAT_EQ(kPi, buffer[0]);
|
||||
EXPECT_FLOAT_EQ(kE, buffer[1]);
|
||||
EXPECT_FLOAT_EQ(kAvogadro, buffer[2]);
|
||||
}
|
||||
|
||||
TEST_F(TransientFileUtilsTest, ReadDoubleBufferFromFile) {
|
||||
std::string test_filename = kTestFileName;
|
||||
|
||||
scoped_ptr<FileWrapper> file(FileWrapper::Create());
|
||||
|
||||
file->OpenFile(test_filename.c_str(),
|
||||
true, // Read only.
|
||||
true, // Loop.
|
||||
false); // No text.
|
||||
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
|
||||
<< kTestFileName.c_str();
|
||||
|
||||
const size_t kBufferLength = 3;
|
||||
scoped_ptr<double[]> buffer(new double[kBufferLength]);
|
||||
|
||||
|
||||
EXPECT_EQ(kBufferLength, ReadDoubleBufferFromFile(file.get(),
|
||||
kBufferLength,
|
||||
buffer.get()));
|
||||
EXPECT_DOUBLE_EQ(kPi, buffer[0]);
|
||||
EXPECT_DOUBLE_EQ(kE, buffer[1]);
|
||||
EXPECT_DOUBLE_EQ(kAvogadro, buffer[2]);
|
||||
|
||||
file->Rewind();
|
||||
|
||||
// The next test is for checking the case where there are not as much data as
|
||||
// needed in the file, but reads to the end, and it returns the number of
|
||||
// doubles read.
|
||||
const size_t kBufferLenghtLargerThanFile = kBufferLength * 2;
|
||||
buffer.reset(new double[kBufferLenghtLargerThanFile]);
|
||||
EXPECT_EQ(kBufferLength, ReadDoubleBufferFromFile(file.get(),
|
||||
kBufferLenghtLargerThanFile,
|
||||
buffer.get()));
|
||||
EXPECT_DOUBLE_EQ(kPi, buffer[0]);
|
||||
EXPECT_DOUBLE_EQ(kE, buffer[1]);
|
||||
EXPECT_DOUBLE_EQ(kAvogadro, buffer[2]);
|
||||
}
|
||||
|
||||
TEST_F(TransientFileUtilsTest, WriteInt16BufferToFile) {
|
||||
scoped_ptr<FileWrapper> file(FileWrapper::Create());
|
||||
|
||||
std::string kOutFileName = test::OutputPath() + "utils_test.out";
|
||||
|
||||
file->OpenFile(kOutFileName.c_str(),
|
||||
false, // Write mode.
|
||||
false, // No loop.
|
||||
false); // No text.
|
||||
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
|
||||
<< kOutFileName.c_str();
|
||||
|
||||
const size_t kBufferLength = 3;
|
||||
scoped_ptr<int16_t[]> written_buffer(new int16_t[kBufferLength]);
|
||||
scoped_ptr<int16_t[]> read_buffer(new int16_t[kBufferLength]);
|
||||
|
||||
written_buffer[0] = 1;
|
||||
written_buffer[1] = 2;
|
||||
written_buffer[2] = 3;
|
||||
|
||||
EXPECT_EQ(kBufferLength, WriteInt16BufferToFile(file.get(),
|
||||
kBufferLength,
|
||||
written_buffer.get()));
|
||||
|
||||
file->CloseFile();
|
||||
|
||||
file->OpenFile(kOutFileName.c_str(),
|
||||
true, // Read only.
|
||||
false, // No loop.
|
||||
false); // No text.
|
||||
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
|
||||
<< kOutFileName.c_str();
|
||||
|
||||
EXPECT_EQ(kBufferLength, ReadInt16BufferFromFile(file.get(),
|
||||
kBufferLength,
|
||||
read_buffer.get()));
|
||||
EXPECT_EQ(0, memcmp(written_buffer.get(),
|
||||
read_buffer.get(),
|
||||
kBufferLength * sizeof(written_buffer[0])));
|
||||
}
|
||||
|
||||
TEST_F(TransientFileUtilsTest, WriteFloatBufferToFile) {
|
||||
scoped_ptr<FileWrapper> file(FileWrapper::Create());
|
||||
|
||||
std::string kOutFileName = test::OutputPath() + "utils_test.out";
|
||||
|
||||
file->OpenFile(kOutFileName.c_str(),
|
||||
false, // Write mode.
|
||||
false, // No loop.
|
||||
false); // No text.
|
||||
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
|
||||
<< kOutFileName.c_str();
|
||||
|
||||
const size_t kBufferLength = 3;
|
||||
scoped_ptr<float[]> written_buffer(new float[kBufferLength]);
|
||||
scoped_ptr<float[]> read_buffer(new float[kBufferLength]);
|
||||
|
||||
written_buffer[0] = kPi;
|
||||
written_buffer[1] = kE;
|
||||
written_buffer[2] = kAvogadro;
|
||||
|
||||
EXPECT_EQ(kBufferLength, WriteFloatBufferToFile(file.get(),
|
||||
kBufferLength,
|
||||
written_buffer.get()));
|
||||
|
||||
file->CloseFile();
|
||||
|
||||
file->OpenFile(kOutFileName.c_str(),
|
||||
true, // Read only.
|
||||
false, // No loop.
|
||||
false); // No text.
|
||||
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
|
||||
<< kOutFileName.c_str();
|
||||
|
||||
EXPECT_EQ(kBufferLength, ReadFloatBufferFromFile(file.get(),
|
||||
kBufferLength,
|
||||
read_buffer.get()));
|
||||
EXPECT_EQ(0, memcmp(written_buffer.get(),
|
||||
read_buffer.get(),
|
||||
kBufferLength * sizeof(written_buffer[0])));
|
||||
}
|
||||
|
||||
TEST_F(TransientFileUtilsTest, WriteDoubleBufferToFile) {
|
||||
scoped_ptr<FileWrapper> file(FileWrapper::Create());
|
||||
|
||||
std::string kOutFileName = test::OutputPath() + "utils_test.out";
|
||||
|
||||
file->OpenFile(kOutFileName.c_str(),
|
||||
false, // Write mode.
|
||||
false, // No loop.
|
||||
false); // No text.
|
||||
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
|
||||
<< kOutFileName.c_str();
|
||||
|
||||
const size_t kBufferLength = 3;
|
||||
scoped_ptr<double[]> written_buffer(new double[kBufferLength]);
|
||||
scoped_ptr<double[]> read_buffer(new double[kBufferLength]);
|
||||
|
||||
written_buffer[0] = kPi;
|
||||
written_buffer[1] = kE;
|
||||
written_buffer[2] = kAvogadro;
|
||||
|
||||
EXPECT_EQ(kBufferLength, WriteDoubleBufferToFile(file.get(),
|
||||
kBufferLength,
|
||||
written_buffer.get()));
|
||||
|
||||
file->CloseFile();
|
||||
|
||||
file->OpenFile(kOutFileName.c_str(),
|
||||
true, // Read only.
|
||||
false, // No loop.
|
||||
false); // No text.
|
||||
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
|
||||
<< kOutFileName.c_str();
|
||||
|
||||
EXPECT_EQ(kBufferLength, ReadDoubleBufferFromFile(file.get(),
|
||||
kBufferLength,
|
||||
read_buffer.get()));
|
||||
EXPECT_EQ(0, memcmp(written_buffer.get(),
|
||||
read_buffer.get(),
|
||||
kBufferLength * sizeof(written_buffer[0])));
|
||||
}
|
||||
|
||||
TEST_F(TransientFileUtilsTest, ExpectedErrorReturnValues) {
|
||||
std::string test_filename = kTestFileName;
|
||||
|
||||
double value;
|
||||
scoped_ptr<int16_t[]> int16_buffer(new int16_t[1]);
|
||||
scoped_ptr<double[]> double_buffer(new double[1]);
|
||||
scoped_ptr<FileWrapper> file(FileWrapper::Create());
|
||||
|
||||
EXPECT_EQ(-1, ConvertByteArrayToDouble(NULL, &value));
|
||||
EXPECT_EQ(-1, ConvertByteArrayToDouble(kPiBytes, NULL));
|
||||
|
||||
EXPECT_EQ(-1, ConvertDoubleToByteArray(kPi, NULL));
|
||||
|
||||
// Tests with file not opened.
|
||||
EXPECT_EQ(0u, ReadInt16BufferFromFile(file.get(), 1, int16_buffer.get()));
|
||||
EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(file.get(),
|
||||
1,
|
||||
double_buffer.get()));
|
||||
EXPECT_EQ(0u, ReadDoubleBufferFromFile(file.get(), 1, double_buffer.get()));
|
||||
EXPECT_EQ(0u, WriteInt16BufferToFile(file.get(), 1, int16_buffer.get()));
|
||||
EXPECT_EQ(0u, WriteDoubleBufferToFile(file.get(), 1, double_buffer.get()));
|
||||
|
||||
file->OpenFile(test_filename.c_str(),
|
||||
true, // Read only.
|
||||
true, // Loop.
|
||||
false); // No text.
|
||||
ASSERT_TRUE(file->Open()) << "File could not be opened:\n"
|
||||
<< kTestFileName.c_str();
|
||||
|
||||
EXPECT_EQ(0u, ReadInt16BufferFromFile(NULL, 1, int16_buffer.get()));
|
||||
EXPECT_EQ(0u, ReadInt16BufferFromFile(file.get(), 1, NULL));
|
||||
EXPECT_EQ(0u, ReadInt16BufferFromFile(file.get(), 0, int16_buffer.get()));
|
||||
|
||||
EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(NULL, 1, double_buffer.get()));
|
||||
EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(file.get(), 1, NULL));
|
||||
EXPECT_EQ(0u, ReadInt16FromFileToDoubleBuffer(file.get(),
|
||||
0,
|
||||
double_buffer.get()));
|
||||
|
||||
EXPECT_EQ(0u, ReadDoubleBufferFromFile(NULL, 1, double_buffer.get()));
|
||||
EXPECT_EQ(0u, ReadDoubleBufferFromFile(file.get(), 1, NULL));
|
||||
EXPECT_EQ(0u, ReadDoubleBufferFromFile(file.get(), 0, double_buffer.get()));
|
||||
|
||||
EXPECT_EQ(0u, WriteInt16BufferToFile(NULL, 1, int16_buffer.get()));
|
||||
EXPECT_EQ(0u, WriteInt16BufferToFile(file.get(), 1, NULL));
|
||||
EXPECT_EQ(0u, WriteInt16BufferToFile(file.get(), 0, int16_buffer.get()));
|
||||
|
||||
EXPECT_EQ(0u, WriteDoubleBufferToFile(NULL, 1, double_buffer.get()));
|
||||
EXPECT_EQ(0u, WriteDoubleBufferToFile(file.get(), 1, NULL));
|
||||
EXPECT_EQ(0u, WriteDoubleBufferToFile(file.get(), 0, double_buffer.get()));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
49
webrtc/modules/audio_processing/transient/moving_moments.cc
Normal file
49
webrtc/modules/audio_processing/transient/moving_moments.cc
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/moving_moments.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
MovingMoments::MovingMoments(size_t length)
|
||||
: length_(length),
|
||||
queue_(),
|
||||
sum_(0.0),
|
||||
sum_of_squares_(0.0) {
|
||||
assert(length > 0);
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
queue_.push(0.0);
|
||||
}
|
||||
}
|
||||
|
||||
MovingMoments::~MovingMoments() {}
|
||||
|
||||
void MovingMoments::CalculateMoments(const float* in, size_t in_length,
|
||||
float* first, float* second) {
|
||||
assert(in && in_length > 0 && first && second);
|
||||
|
||||
for (size_t i = 0; i < in_length; ++i) {
|
||||
const float old_value = queue_.front();
|
||||
queue_.pop();
|
||||
queue_.push(in[i]);
|
||||
|
||||
sum_ += in[i] - old_value;
|
||||
sum_of_squares_ += in[i] * in[i] - old_value * old_value;
|
||||
first[i] = sum_ / length_;
|
||||
second[i] = sum_of_squares_ / length_;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
52
webrtc/modules/audio_processing/transient/moving_moments.h
Normal file
52
webrtc/modules/audio_processing/transient/moving_moments.h
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_
|
||||
|
||||
#include <queue>
|
||||
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Calculates the first and second moments for each value of a buffer taking
|
||||
// into account a given number of previous values.
|
||||
// It preserves its state, so it can be multiple-called.
|
||||
// TODO(chadan): Implement a function that takes a buffer of first moments and a
|
||||
// buffer of second moments; and calculates the variances. When needed.
|
||||
// TODO(chadan): Add functionality to update with a buffer but only output are
|
||||
// the last values of the moments. When needed.
|
||||
class MovingMoments {
|
||||
public:
|
||||
// Creates a Moving Moments object, that uses the last |length| values
|
||||
// (including the new value introduced in every new calculation).
|
||||
explicit MovingMoments(size_t length);
|
||||
~MovingMoments();
|
||||
|
||||
// Calculates the new values using |in|. Results will be in the out buffers.
|
||||
// |first| and |second| must be allocated with at least |in_length|.
|
||||
void CalculateMoments(const float* in, size_t in_length,
|
||||
float* first, float* second);
|
||||
|
||||
private:
|
||||
size_t length_;
|
||||
// A queue holding the |length_| latest input values.
|
||||
std::queue<float> queue_;
|
||||
// Sum of the values of the queue.
|
||||
float sum_;
|
||||
// Sum of the squares of the values of the queue.
|
||||
float sum_of_squares_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_
|
@ -0,0 +1,206 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/moving_moments.h"
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const float kTolerance = 0.0001f;
|
||||
|
||||
class MovingMomentsTest : public ::testing::Test {
|
||||
protected:
|
||||
static const size_t kMovingMomentsBufferLength = 5;
|
||||
static const size_t kMaxOutputLength = 20; // Valid for this tests only.
|
||||
|
||||
virtual void SetUp();
|
||||
// Calls CalculateMoments and verifies that it produces the expected
|
||||
// outputs.
|
||||
void CalculateMomentsAndVerify(const float* input, size_t input_length,
|
||||
const float* expected_mean,
|
||||
const float* expected_mean_squares);
|
||||
|
||||
scoped_ptr<MovingMoments> moving_moments_;
|
||||
float output_mean_[kMaxOutputLength];
|
||||
float output_mean_squares_[kMaxOutputLength];
|
||||
};
|
||||
|
||||
const size_t MovingMomentsTest::kMaxOutputLength;
|
||||
|
||||
void MovingMomentsTest::SetUp() {
|
||||
moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength));
|
||||
}
|
||||
|
||||
void MovingMomentsTest::CalculateMomentsAndVerify(
|
||||
const float* input, size_t input_length,
|
||||
const float* expected_mean,
|
||||
const float* expected_mean_squares) {
|
||||
ASSERT_LE(input_length, kMaxOutputLength);
|
||||
|
||||
moving_moments_->CalculateMoments(input,
|
||||
input_length,
|
||||
output_mean_,
|
||||
output_mean_squares_);
|
||||
|
||||
for (size_t i = 1; i < input_length; ++i) {
|
||||
EXPECT_NEAR(expected_mean[i], output_mean_[i], kTolerance);
|
||||
EXPECT_NEAR(expected_mean_squares[i], output_mean_squares_[i], kTolerance);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(MovingMomentsTest, CorrectMomentsOfAnAllZerosBuffer) {
|
||||
const float kInput[] = {0.f, 0.f, 0.f, 0.f, 0.f};
|
||||
const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
|
||||
|
||||
const float expected_mean[kInputLength] = {0.f, 0.f, 0.f, 0.f, 0.f};
|
||||
const float expected_mean_squares[kInputLength] = {0.f, 0.f, 0.f, 0.f, 0.f};
|
||||
|
||||
CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
|
||||
expected_mean_squares);
|
||||
}
|
||||
|
||||
TEST_F(MovingMomentsTest, CorrectMomentsOfAConstantBuffer) {
|
||||
const float kInput[] = {5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f};
|
||||
const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
|
||||
|
||||
const float expected_mean[kInputLength] =
|
||||
{1.f, 2.f, 3.f, 4.f, 5.f, 5.f, 5.f, 5.f, 5.f, 5.f};
|
||||
const float expected_mean_squares[kInputLength] =
|
||||
{5.f, 10.f, 15.f, 20.f, 25.f, 25.f, 25.f, 25.f, 25.f, 25.f};
|
||||
|
||||
CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
|
||||
expected_mean_squares);
|
||||
}
|
||||
|
||||
TEST_F(MovingMomentsTest, CorrectMomentsOfAnIncreasingBuffer) {
|
||||
const float kInput[] = {1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f};
|
||||
const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
|
||||
|
||||
const float expected_mean[kInputLength] =
|
||||
{0.2f, 0.6f, 1.2f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f};
|
||||
const float expected_mean_squares[kInputLength] =
|
||||
{0.2f, 1.f, 2.8f, 6.f, 11.f, 18.f, 27.f, 38.f, 51.f};
|
||||
|
||||
CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
|
||||
expected_mean_squares);
|
||||
}
|
||||
|
||||
TEST_F(MovingMomentsTest, CorrectMomentsOfADecreasingBuffer) {
|
||||
const float kInput[] =
|
||||
{-1.f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f, -8.f, -9.f};
|
||||
const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
|
||||
|
||||
const float expected_mean[kInputLength] =
|
||||
{-0.2f, -0.6f, -1.2f, -2.f, -3.f, -4.f, -5.f, -6.f, -7.f};
|
||||
const float expected_mean_squares[kInputLength] =
|
||||
{0.2f, 1.f, 2.8f, 6.f, 11.f, 18.f, 27.f, 38.f, 51.f};
|
||||
|
||||
CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
|
||||
expected_mean_squares);
|
||||
}
|
||||
|
||||
TEST_F(MovingMomentsTest, CorrectMomentsOfAZeroMeanSequence) {
|
||||
const size_t kMovingMomentsBufferLength = 4;
|
||||
moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength));
|
||||
const float kInput[] =
|
||||
{1.f, -1.f, 1.f, -1.f, 1.f, -1.f, 1.f, -1.f, 1.f, -1.f};
|
||||
const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
|
||||
|
||||
const float expected_mean[kInputLength] =
|
||||
{0.25f, 0.f, 0.25f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
|
||||
const float expected_mean_squares[kInputLength] =
|
||||
{0.25f, 0.5f, 0.75f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f};
|
||||
|
||||
CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
|
||||
expected_mean_squares);
|
||||
}
|
||||
|
||||
TEST_F(MovingMomentsTest, CorrectMomentsOfAnArbitraryBuffer) {
|
||||
const float kInput[] =
|
||||
{0.2f, 0.3f, 0.5f, 0.7f, 0.11f, 0.13f, 0.17f, 0.19f, 0.23f};
|
||||
const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
|
||||
|
||||
const float expected_mean[kInputLength] =
|
||||
{0.04f, 0.1f, 0.2f, 0.34f, 0.362f, 0.348f, 0.322f, 0.26f, 0.166f};
|
||||
const float expected_mean_squares[kInputLength] =
|
||||
{0.008f, 0.026f, 0.076f, 0.174f, 0.1764f, 0.1718f, 0.1596f, 0.1168f,
|
||||
0.0294f};
|
||||
|
||||
CalculateMomentsAndVerify(kInput, kInputLength, expected_mean,
|
||||
expected_mean_squares);
|
||||
}
|
||||
|
||||
TEST_F(MovingMomentsTest, MutipleCalculateMomentsCalls) {
|
||||
const float kInputFirstCall[] =
|
||||
{0.2f, 0.3f, 0.5f, 0.7f, 0.11f, 0.13f, 0.17f, 0.19f, 0.23f};
|
||||
const size_t kInputFirstCallLength = sizeof(kInputFirstCall) /
|
||||
sizeof(kInputFirstCall[0]);
|
||||
const float kInputSecondCall[] = {0.29f, 0.31f};
|
||||
const size_t kInputSecondCallLength = sizeof(kInputSecondCall) /
|
||||
sizeof(kInputSecondCall[0]);
|
||||
const float kInputThirdCall[] = {0.37f, 0.41f, 0.43f, 0.47f};
|
||||
const size_t kInputThirdCallLength = sizeof(kInputThirdCall) /
|
||||
sizeof(kInputThirdCall[0]);
|
||||
|
||||
const float expected_mean_first_call[kInputFirstCallLength] =
|
||||
{0.04f, 0.1f, 0.2f, 0.34f, 0.362f, 0.348f, 0.322f, 0.26f, 0.166f};
|
||||
const float expected_mean_squares_first_call[kInputFirstCallLength] =
|
||||
{0.008f, 0.026f, 0.076f, 0.174f, 0.1764f, 0.1718f, 0.1596f, 0.1168f,
|
||||
0.0294f};
|
||||
|
||||
const float expected_mean_second_call[kInputSecondCallLength] =
|
||||
{0.202f, 0.238f};
|
||||
const float expected_mean_squares_second_call[kInputSecondCallLength] =
|
||||
{0.0438f, 0.0596f};
|
||||
|
||||
const float expected_mean_third_call[kInputThirdCallLength] =
|
||||
{0.278f, 0.322f, 0.362f, 0.398f};
|
||||
const float expected_mean_squares_third_call[kInputThirdCallLength] =
|
||||
{0.0812f, 0.1076f, 0.134f, 0.1614f};
|
||||
|
||||
CalculateMomentsAndVerify(kInputFirstCall, kInputFirstCallLength,
|
||||
expected_mean_first_call, expected_mean_squares_first_call);
|
||||
|
||||
CalculateMomentsAndVerify(kInputSecondCall, kInputSecondCallLength,
|
||||
expected_mean_second_call, expected_mean_squares_second_call);
|
||||
|
||||
CalculateMomentsAndVerify(kInputThirdCall, kInputThirdCallLength,
|
||||
expected_mean_third_call, expected_mean_squares_third_call);
|
||||
}
|
||||
|
||||
TEST_F(MovingMomentsTest,
|
||||
VerifySampleBasedVsBlockBasedCalculation) {
|
||||
const float kInput[] =
|
||||
{0.2f, 0.3f, 0.5f, 0.7f, 0.11f, 0.13f, 0.17f, 0.19f, 0.23f};
|
||||
const size_t kInputLength = sizeof(kInput) / sizeof(kInput[0]);
|
||||
|
||||
float output_mean_block_based[kInputLength];
|
||||
float output_mean_squares_block_based[kInputLength];
|
||||
|
||||
float output_mean_sample_based;
|
||||
float output_mean_squares_sample_based;
|
||||
|
||||
moving_moments_->CalculateMoments(
|
||||
kInput, kInputLength, output_mean_block_based,
|
||||
output_mean_squares_block_based);
|
||||
moving_moments_.reset(new MovingMoments(kMovingMomentsBufferLength));
|
||||
for (size_t i = 0; i < kInputLength; ++i) {
|
||||
moving_moments_->CalculateMoments(
|
||||
&kInput[i], 1, &output_mean_sample_based,
|
||||
&output_mean_squares_sample_based);
|
||||
EXPECT_FLOAT_EQ(output_mean_block_based[i], output_mean_sample_based);
|
||||
EXPECT_FLOAT_EQ(output_mean_squares_block_based[i],
|
||||
output_mean_squares_sample_based);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,12 @@
|
||||
function [] = plotDetection(PCMfile, DATfile, fs, chunkSize)
|
||||
%[] = plotDetection(PCMfile, DATfile, fs, chunkSize)
|
||||
%
|
||||
%Plots the signal alongside the detection values.
|
||||
%
|
||||
%PCMfile: The file of the input signal in PCM format.
|
||||
%DATfile: The file containing the detection values in binary float format.
|
||||
%fs: The sample rate of the signal in Hertz.
|
||||
%chunkSize: The chunk size used to compute the detection values in seconds.
|
||||
[x, tx] = readPCM(PCMfile, fs);
|
||||
[d, td] = readDetection(DATfile, fs, chunkSize);
|
||||
plot(tx, x, td, d);
|
@ -0,0 +1,16 @@
|
||||
function [d, t] = readDetection(file, fs, chunkSize)
|
||||
%[d, t] = readDetection(file, fs, chunkSize)
|
||||
%
|
||||
%Reads a detection signal from a DAT file.
|
||||
%
|
||||
%d: The detection signal.
|
||||
%t: The respective time vector.
|
||||
%
|
||||
%file: The DAT file where the detection signal is stored in float format.
|
||||
%fs: The signal sample rate in Hertz.
|
||||
%chunkSize: The chunk size used for the detection in seconds.
|
||||
fid = fopen(file);
|
||||
d = fread(fid, inf, 'float');
|
||||
fclose(fid);
|
||||
t = 0:(1 / fs):(length(d) * chunkSize - 1 / fs);
|
||||
d = d(floor(t / chunkSize) + 1);
|
16
webrtc/modules/audio_processing/transient/test/readPCM.m
Normal file
16
webrtc/modules/audio_processing/transient/test/readPCM.m
Normal file
@ -0,0 +1,16 @@
|
||||
function [x, t] = readPCM(file, fs)
|
||||
%[x, t] = readPCM(file, fs)
|
||||
%
|
||||
%Reads a signal from a PCM file.
|
||||
%
|
||||
%x: The read signal after normalization.
|
||||
%t: The respective time vector.
|
||||
%
|
||||
%file: The PCM file where the signal is stored in int16 format.
|
||||
%fs: The signal sample rate in Hertz.
|
||||
fid = fopen(file);
|
||||
x = fread(fid, inf, 'int16');
|
||||
fclose(fid);
|
||||
x = x - mean(x);
|
||||
x = x / max(abs(x));
|
||||
t = 0:(1 / fs):((length(x) - 1) / fs);
|
173
webrtc/modules/audio_processing/transient/transient_detector.cc
Normal file
173
webrtc/modules/audio_processing/transient/transient_detector.cc
Normal file
@ -0,0 +1,173 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/transient_detector.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/common.h"
|
||||
#include "webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h"
|
||||
#include "webrtc/modules/audio_processing/transient/moving_moments.h"
|
||||
#include "webrtc/modules/audio_processing/transient/wpd_tree.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kTransientLengthMs = 30;
|
||||
static const int kChunksAtStartupLeftToDelete =
|
||||
kTransientLengthMs / ts::kChunkSizeMs;
|
||||
static const float kDetectThreshold = 16.f;
|
||||
|
||||
TransientDetector::TransientDetector(int sample_rate_hz)
|
||||
: samples_per_chunk_(sample_rate_hz * ts::kChunkSizeMs / 1000),
|
||||
last_first_moment_(),
|
||||
last_second_moment_(),
|
||||
chunks_at_startup_left_to_delete_(kChunksAtStartupLeftToDelete),
|
||||
reference_energy_(1.f),
|
||||
using_reference_(false) {
|
||||
assert(sample_rate_hz == ts::kSampleRate8kHz ||
|
||||
sample_rate_hz == ts::kSampleRate16kHz ||
|
||||
sample_rate_hz == ts::kSampleRate32kHz ||
|
||||
sample_rate_hz == ts::kSampleRate48kHz);
|
||||
int samples_per_transient = sample_rate_hz * kTransientLengthMs / 1000;
|
||||
// Adjustment to avoid data loss while downsampling, making
|
||||
// |samples_per_chunk_| and |samples_per_transient| always divisible by
|
||||
// |kLeaves|.
|
||||
samples_per_chunk_ -= samples_per_chunk_ % kLeaves;
|
||||
samples_per_transient -= samples_per_transient % kLeaves;
|
||||
|
||||
tree_leaves_data_length_ = samples_per_chunk_ / kLeaves;
|
||||
wpd_tree_.reset(new WPDTree(samples_per_chunk_,
|
||||
kDaubechies8HighPassCoefficients,
|
||||
kDaubechies8LowPassCoefficients,
|
||||
kDaubechies8CoefficientsLength,
|
||||
kLevels));
|
||||
for (size_t i = 0; i < kLeaves; ++i) {
|
||||
moving_moments_[i].reset(
|
||||
new MovingMoments(samples_per_transient / kLeaves));
|
||||
}
|
||||
|
||||
first_moments_.reset(new float[tree_leaves_data_length_]);
|
||||
second_moments_.reset(new float[tree_leaves_data_length_]);
|
||||
|
||||
for (int i = 0; i < kChunksAtStartupLeftToDelete; ++i) {
|
||||
previous_results_.push_back(0.f);
|
||||
}
|
||||
}
|
||||
|
||||
TransientDetector::~TransientDetector() {}
|
||||
|
||||
float TransientDetector::Detect(const float* data,
|
||||
size_t data_length,
|
||||
const float* reference_data,
|
||||
size_t reference_length) {
|
||||
assert(data && data_length == samples_per_chunk_);
|
||||
|
||||
// TODO(aluebs): Check if these errors can logically happen and if not assert
|
||||
// on them.
|
||||
if (wpd_tree_->Update(data, samples_per_chunk_) != 0) {
|
||||
return -1.f;
|
||||
}
|
||||
|
||||
float result = 0.f;
|
||||
|
||||
for (size_t i = 0; i < kLeaves; ++i) {
|
||||
WPDNode* leaf = wpd_tree_->NodeAt(kLevels, i);
|
||||
|
||||
moving_moments_[i]->CalculateMoments(leaf->data(),
|
||||
tree_leaves_data_length_,
|
||||
first_moments_.get(),
|
||||
second_moments_.get());
|
||||
|
||||
// Add value delayed (Use the last moments from the last call to Detect).
|
||||
float unbiased_data = leaf->data()[0] - last_first_moment_[i];
|
||||
result +=
|
||||
unbiased_data * unbiased_data / (last_second_moment_[i] + FLT_MIN);
|
||||
|
||||
// Add new values.
|
||||
for (size_t j = 1; j < tree_leaves_data_length_; ++j) {
|
||||
unbiased_data = leaf->data()[j] - first_moments_[j - 1];
|
||||
result +=
|
||||
unbiased_data * unbiased_data / (second_moments_[j - 1] + FLT_MIN);
|
||||
}
|
||||
|
||||
last_first_moment_[i] = first_moments_[tree_leaves_data_length_ - 1];
|
||||
last_second_moment_[i] = second_moments_[tree_leaves_data_length_ - 1];
|
||||
}
|
||||
|
||||
result /= tree_leaves_data_length_;
|
||||
|
||||
result *= ReferenceDetectionValue(reference_data, reference_length);
|
||||
|
||||
if (chunks_at_startup_left_to_delete_ > 0) {
|
||||
chunks_at_startup_left_to_delete_--;
|
||||
result = 0.f;
|
||||
}
|
||||
|
||||
if (result >= kDetectThreshold) {
|
||||
result = 1.f;
|
||||
} else {
|
||||
// Get proportional value.
|
||||
// Proportion achieved with a squared raised cosine function with domain
|
||||
// [0, kDetectThreshold) and image [0, 1), it's always increasing.
|
||||
const float horizontal_scaling = ts::kPi / kDetectThreshold;
|
||||
const float kHorizontalShift = ts::kPi;
|
||||
const float kVerticalScaling = 0.5f;
|
||||
const float kVerticalShift = 1.f;
|
||||
|
||||
result = (cos(result * horizontal_scaling + kHorizontalShift)
|
||||
+ kVerticalShift) * kVerticalScaling;
|
||||
result *= result;
|
||||
}
|
||||
|
||||
previous_results_.pop_front();
|
||||
previous_results_.push_back(result);
|
||||
|
||||
// In the current implementation we return the max of the current result and
|
||||
// the previous results, so the high results have a width equals to
|
||||
// |transient_length|.
|
||||
return *std::max_element(previous_results_.begin(), previous_results_.end());
|
||||
}
|
||||
|
||||
// Looks for the highest slope and compares it with the previous ones.
|
||||
// An exponential transformation takes this to the [0, 1] range. This value is
|
||||
// multiplied by the detection result to avoid false positives.
|
||||
float TransientDetector::ReferenceDetectionValue(const float* data,
|
||||
size_t length) {
|
||||
if (data == NULL) {
|
||||
using_reference_ = false;
|
||||
return 1.f;
|
||||
}
|
||||
static const float kEnergyRatioThreshold = 0.2f;
|
||||
static const float kReferenceNonLinearity = 20.f;
|
||||
static const float kMemory = 0.99f;
|
||||
float reference_energy = 0.f;
|
||||
for (size_t i = 1; i < length; ++i) {
|
||||
reference_energy += data[i] * data[i];
|
||||
}
|
||||
if (reference_energy == 0.f) {
|
||||
using_reference_ = false;
|
||||
return 1.f;
|
||||
}
|
||||
assert(reference_energy_ != 0);
|
||||
float result = 1.f / (1.f + exp(kReferenceNonLinearity *
|
||||
(kEnergyRatioThreshold -
|
||||
reference_energy / reference_energy_)));
|
||||
reference_energy_ =
|
||||
kMemory * reference_energy_ + (1.f - kMemory) * reference_energy;
|
||||
|
||||
using_reference_ = true;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_
|
||||
|
||||
#include <deque>
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/moving_moments.h"
|
||||
#include "webrtc/modules/audio_processing/transient/wpd_tree.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This is an implementation of the transient detector described in "Causal
|
||||
// Wavelet based transient detector".
|
||||
// Calculates the log-likelihood of a transient to happen on a signal at any
|
||||
// given time based on the previous samples; it uses a WPD tree to analyze the
|
||||
// signal. It preserves its state, so it can be multiple-called.
|
||||
class TransientDetector {
|
||||
public:
|
||||
// TODO(chadan): The only supported wavelet is Daubechies 8 using a WPD tree
|
||||
// of 3 levels. Make an overloaded constructor to allow different wavelets and
|
||||
// depths of the tree. When needed.
|
||||
|
||||
// Creates a wavelet based transient detector.
|
||||
TransientDetector(int sample_rate_hz);
|
||||
|
||||
~TransientDetector();
|
||||
|
||||
// Calculates the log-likelihood of the existence of a transient in |data|.
|
||||
// |data_length| has to be equal to |samples_per_chunk_|.
|
||||
// Returns a value between 0 and 1, as a non linear representation of this
|
||||
// likelihood.
|
||||
// Returns a negative value on error.
|
||||
float Detect(const float* data,
|
||||
size_t data_length,
|
||||
const float* reference_data,
|
||||
size_t reference_length);
|
||||
|
||||
bool using_reference() { return using_reference_; }
|
||||
|
||||
private:
|
||||
float ReferenceDetectionValue(const float* data, size_t length);
|
||||
|
||||
static const size_t kLevels = 3;
|
||||
static const size_t kLeaves = 1 << kLevels;
|
||||
|
||||
size_t samples_per_chunk_;
|
||||
|
||||
scoped_ptr<WPDTree> wpd_tree_;
|
||||
size_t tree_leaves_data_length_;
|
||||
|
||||
// A MovingMoments object is needed for each leaf in the WPD tree.
|
||||
scoped_ptr<MovingMoments> moving_moments_[kLeaves];
|
||||
|
||||
scoped_ptr<float[]> first_moments_;
|
||||
scoped_ptr<float[]> second_moments_;
|
||||
|
||||
// Stores the last calculated moments from the previous detection.
|
||||
float last_first_moment_[kLeaves];
|
||||
float last_second_moment_[kLeaves];
|
||||
|
||||
// We keep track of the previous results from the previous chunks, so it can
|
||||
// be used to effectively give results according to the |transient_length|.
|
||||
std::deque<float> previous_results_;
|
||||
|
||||
// Number of chunks that are going to return only zeros at the beginning of
|
||||
// the detection. It helps to avoid infs and nans due to the lack of
|
||||
// information.
|
||||
int chunks_at_startup_left_to_delete_;
|
||||
|
||||
float reference_energy_;
|
||||
|
||||
bool using_reference_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_
|
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/transient_detector.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "webrtc/modules/audio_processing/transient/common.h"
|
||||
#include "webrtc/modules/audio_processing/transient/file_utils.h"
|
||||
#include "webrtc/system_wrappers/interface/file_wrapper.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
#include "webrtc/test/testsupport/fileutils.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kSampleRatesHz[] = {ts::kSampleRate8kHz,
|
||||
ts::kSampleRate16kHz,
|
||||
ts::kSampleRate32kHz,
|
||||
ts::kSampleRate48kHz};
|
||||
static const size_t kNumberOfSampleRates =
|
||||
sizeof(kSampleRatesHz) / sizeof(*kSampleRatesHz);
|
||||
|
||||
// This test is for the correctness of the transient detector.
|
||||
// Checks the results comparing them with the ones stored in the detect files in
|
||||
// the directory: resources/audio_processing/transient/
|
||||
// The files contain all the results in double precision (Little endian).
|
||||
// The audio files used with different sample rates are stored in the same
|
||||
// directory.
|
||||
TEST(TransientDetectorTest, CorrectnessBasedOnFiles) {
|
||||
for (size_t i = 0; i < kNumberOfSampleRates; ++i) {
|
||||
int sample_rate_hz = kSampleRatesHz[i];
|
||||
|
||||
// Prepare detect file.
|
||||
std::stringstream detect_file_name;
|
||||
detect_file_name << "audio_processing/transient/detect"
|
||||
<< (sample_rate_hz / 1000) << "kHz";
|
||||
|
||||
scoped_ptr<FileWrapper> detect_file(FileWrapper::Create());
|
||||
|
||||
detect_file->OpenFile(
|
||||
test::ResourcePath(detect_file_name.str(), "dat").c_str(),
|
||||
true, // Read only.
|
||||
false, // No loop.
|
||||
false); // No text.
|
||||
|
||||
bool file_opened = detect_file->Open();
|
||||
ASSERT_TRUE(file_opened) << "File could not be opened.\n"
|
||||
<< detect_file_name.str().c_str();
|
||||
|
||||
// Prepare audio file.
|
||||
std::stringstream audio_file_name;
|
||||
audio_file_name << "audio_processing/transient/audio"
|
||||
<< (sample_rate_hz / 1000) << "kHz";
|
||||
|
||||
scoped_ptr<FileWrapper> audio_file(FileWrapper::Create());
|
||||
|
||||
audio_file->OpenFile(
|
||||
test::ResourcePath(audio_file_name.str(), "pcm").c_str(),
|
||||
true, // Read only.
|
||||
false, // No loop.
|
||||
false); // No text.
|
||||
|
||||
// Create detector.
|
||||
TransientDetector detector(sample_rate_hz);
|
||||
|
||||
const size_t buffer_length = sample_rate_hz * ts::kChunkSizeMs / 1000;
|
||||
scoped_ptr<float[]> buffer(new float[buffer_length]);
|
||||
|
||||
const float kTolerance = 0.01f;
|
||||
|
||||
size_t frames_read = 0;
|
||||
|
||||
while (ReadInt16FromFileToFloatBuffer(audio_file.get(),
|
||||
buffer_length,
|
||||
buffer.get()) == buffer_length) {
|
||||
++frames_read;
|
||||
|
||||
float detector_value =
|
||||
detector.Detect(buffer.get(), buffer_length, NULL, 0);
|
||||
double file_value;
|
||||
ASSERT_EQ(1u, ReadDoubleBufferFromFile(detect_file.get(), 1, &file_value))
|
||||
<< "Detect test file is malformed.\n";
|
||||
|
||||
// Compare results with data from the matlab test file.
|
||||
EXPECT_NEAR(file_value, detector_value, kTolerance) << "Frame: "
|
||||
<< frames_read;
|
||||
}
|
||||
|
||||
detect_file->CloseFile();
|
||||
audio_file->CloseFile();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,250 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
|
||||
#include "gflags/gflags.h"
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "webrtc/common_audio/include/audio_util.h"
|
||||
#include "webrtc/modules/audio_processing/agc/agc.h"
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
#include "webrtc/test/testsupport/fileutils.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
DEFINE_string(in_file_name, "", "PCM file that contains the signal.");
|
||||
DEFINE_string(detection_file_name,
|
||||
"",
|
||||
"PCM file that contains the detection signal.");
|
||||
DEFINE_string(reference_file_name,
|
||||
"",
|
||||
"PCM file that contains the reference signal.");
|
||||
|
||||
static bool ValidatePositiveInt(const char* flagname, int32_t value) {
|
||||
if (value <= 0) {
|
||||
printf("%s must be a positive integer.\n", flagname);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
DEFINE_int32(chunk_size_ms,
|
||||
10,
|
||||
"Time between each chunk of samples in milliseconds.");
|
||||
static const bool chunk_size_ms_dummy =
|
||||
google::RegisterFlagValidator(&FLAGS_chunk_size_ms, &ValidatePositiveInt);
|
||||
|
||||
DEFINE_int32(sample_rate_hz,
|
||||
16000,
|
||||
"Sampling frequency of the signal in Hertz.");
|
||||
static const bool sample_rate_hz_dummy =
|
||||
google::RegisterFlagValidator(&FLAGS_sample_rate_hz, &ValidatePositiveInt);
|
||||
DEFINE_int32(detection_rate_hz,
|
||||
0,
|
||||
"Sampling frequency of the detection signal in Hertz.");
|
||||
|
||||
DEFINE_int32(num_channels, 1, "Number of channels.");
|
||||
static const bool num_channels_dummy =
|
||||
google::RegisterFlagValidator(&FLAGS_num_channels, &ValidatePositiveInt);
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
const char kUsage[] =
|
||||
"\nDetects and suppresses transients from file.\n\n"
|
||||
"This application loads the signal from the in_file_name with a specific\n"
|
||||
"num_channels and sample_rate_hz, the detection signal from the\n"
|
||||
"detection_file_name with a specific detection_rate_hz, and the reference\n"
|
||||
"signal from the reference_file_name with sample_rate_hz, divides them\n"
|
||||
"into chunk_size_ms blocks, computes its voice value and depending on the\n"
|
||||
"voice_threshold does the respective restoration. You can always get the\n"
|
||||
"all-voiced or all-unvoiced cases by setting the voice_threshold to 0 or\n"
|
||||
"1 respectively.\n\n";
|
||||
|
||||
// Read next buffers from the test files (signed 16-bit host-endian PCM
|
||||
// format). audio_buffer has int16 samples, detection_buffer has float samples
|
||||
// with range [-32768,32767], and reference_buffer has float samples with range
|
||||
// [-1,1]. Return true iff all the buffers were filled completely.
|
||||
bool ReadBuffers(FILE* in_file,
|
||||
size_t audio_buffer_size,
|
||||
int num_channels,
|
||||
int16_t* audio_buffer,
|
||||
FILE* detection_file,
|
||||
size_t detection_buffer_size,
|
||||
float* detection_buffer,
|
||||
FILE* reference_file,
|
||||
float* reference_buffer) {
|
||||
scoped_ptr<int16_t[]> tmpbuf;
|
||||
int16_t* read_ptr = audio_buffer;
|
||||
if (num_channels > 1) {
|
||||
tmpbuf.reset(new int16_t[num_channels * audio_buffer_size]);
|
||||
read_ptr = tmpbuf.get();
|
||||
}
|
||||
if (fread(read_ptr,
|
||||
sizeof(*read_ptr),
|
||||
num_channels * audio_buffer_size,
|
||||
in_file) != num_channels * audio_buffer_size) {
|
||||
return false;
|
||||
}
|
||||
// De-interleave.
|
||||
if (num_channels > 1) {
|
||||
for (int i = 0; i < num_channels; ++i) {
|
||||
for (size_t j = 0; j < audio_buffer_size; ++j) {
|
||||
audio_buffer[i * audio_buffer_size + j] =
|
||||
read_ptr[i + j * num_channels];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (detection_file) {
|
||||
scoped_ptr<int16_t[]> ibuf(new int16_t[detection_buffer_size]);
|
||||
if (fread(ibuf.get(), sizeof(ibuf[0]), detection_buffer_size,
|
||||
detection_file) != detection_buffer_size)
|
||||
return false;
|
||||
for (size_t i = 0; i < detection_buffer_size; ++i)
|
||||
detection_buffer[i] = ibuf[i];
|
||||
}
|
||||
if (reference_file) {
|
||||
scoped_ptr<int16_t[]> ibuf(new int16_t[audio_buffer_size]);
|
||||
if (fread(ibuf.get(), sizeof(ibuf[0]), audio_buffer_size, reference_file)
|
||||
!= audio_buffer_size)
|
||||
return false;
|
||||
S16ToFloat(ibuf.get(), audio_buffer_size, reference_buffer);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Write a number of samples to an open signed 16-bit host-endian PCM file.
|
||||
static void WritePCM(FILE* f,
|
||||
size_t num_samples,
|
||||
int num_channels,
|
||||
const float* buffer) {
|
||||
scoped_ptr<int16_t[]> ibuf(new int16_t[num_channels * num_samples]);
|
||||
// Interleave.
|
||||
for (int i = 0; i < num_channels; ++i) {
|
||||
for (size_t j = 0; j < num_samples; ++j) {
|
||||
ibuf[i + j * num_channels] = FloatS16ToS16(buffer[i * num_samples + j]);
|
||||
}
|
||||
}
|
||||
fwrite(ibuf.get(), sizeof(ibuf[0]), num_channels * num_samples, f);
|
||||
}
|
||||
|
||||
// This application tests the transient suppression by providing a processed
|
||||
// PCM file, which has to be listened to in order to evaluate the
|
||||
// performance.
|
||||
// It gets an audio file, and its voice gain information, and the suppressor
|
||||
// process it giving the output file "suppressed_keystrokes.pcm".
|
||||
void void_main() {
|
||||
// TODO(aluebs): Remove all FileWrappers.
|
||||
// Prepare the input file.
|
||||
FILE* in_file = fopen(FLAGS_in_file_name.c_str(), "rb");
|
||||
ASSERT_TRUE(in_file != NULL);
|
||||
|
||||
// Prepare the detection file.
|
||||
FILE* detection_file = NULL;
|
||||
if (FLAGS_detection_file_name != "") {
|
||||
detection_file = fopen(FLAGS_detection_file_name.c_str(), "rb");
|
||||
}
|
||||
|
||||
// Prepare the reference file.
|
||||
FILE* reference_file = NULL;
|
||||
if (FLAGS_reference_file_name != "") {
|
||||
reference_file = fopen(FLAGS_reference_file_name.c_str(), "rb");
|
||||
}
|
||||
|
||||
// Prepare the output file.
|
||||
std::string out_file_name = test::OutputPath() + "suppressed_keystrokes.pcm";
|
||||
FILE* out_file = fopen(out_file_name.c_str(), "wb");
|
||||
ASSERT_TRUE(out_file != NULL);
|
||||
|
||||
int detection_rate_hz = FLAGS_detection_rate_hz;
|
||||
if (detection_rate_hz == 0) {
|
||||
detection_rate_hz = FLAGS_sample_rate_hz;
|
||||
}
|
||||
|
||||
Agc agc;
|
||||
|
||||
TransientSuppressor suppressor;
|
||||
suppressor.Initialize(
|
||||
FLAGS_sample_rate_hz, detection_rate_hz, FLAGS_num_channels);
|
||||
|
||||
const size_t audio_buffer_size =
|
||||
FLAGS_chunk_size_ms * FLAGS_sample_rate_hz / 1000;
|
||||
const size_t detection_buffer_size =
|
||||
FLAGS_chunk_size_ms * detection_rate_hz / 1000;
|
||||
|
||||
// int16 and float variants of the same data.
|
||||
scoped_ptr<int16_t[]> audio_buffer_i(
|
||||
new int16_t[FLAGS_num_channels * audio_buffer_size]);
|
||||
scoped_ptr<float[]> audio_buffer_f(
|
||||
new float[FLAGS_num_channels * audio_buffer_size]);
|
||||
|
||||
scoped_ptr<float[]> detection_buffer, reference_buffer;
|
||||
|
||||
if (detection_file)
|
||||
detection_buffer.reset(new float[detection_buffer_size]);
|
||||
if (reference_file)
|
||||
reference_buffer.reset(new float[audio_buffer_size]);
|
||||
|
||||
while (ReadBuffers(in_file,
|
||||
audio_buffer_size,
|
||||
FLAGS_num_channels,
|
||||
audio_buffer_i.get(),
|
||||
detection_file,
|
||||
detection_buffer_size,
|
||||
detection_buffer.get(),
|
||||
reference_file,
|
||||
reference_buffer.get())) {
|
||||
ASSERT_EQ(0,
|
||||
agc.Process(audio_buffer_i.get(),
|
||||
static_cast<int>(audio_buffer_size),
|
||||
FLAGS_sample_rate_hz))
|
||||
<< "The AGC could not process the frame";
|
||||
|
||||
for (size_t i = 0; i < FLAGS_num_channels * audio_buffer_size; ++i) {
|
||||
audio_buffer_f[i] = audio_buffer_i[i];
|
||||
}
|
||||
|
||||
ASSERT_EQ(0,
|
||||
suppressor.Suppress(audio_buffer_f.get(),
|
||||
audio_buffer_size,
|
||||
FLAGS_num_channels,
|
||||
detection_buffer.get(),
|
||||
detection_buffer_size,
|
||||
reference_buffer.get(),
|
||||
audio_buffer_size,
|
||||
agc.voice_probability(),
|
||||
true))
|
||||
<< "The transient suppressor could not suppress the frame";
|
||||
|
||||
// Write result to out file.
|
||||
WritePCM(
|
||||
out_file, audio_buffer_size, FLAGS_num_channels, audio_buffer_f.get());
|
||||
}
|
||||
|
||||
fclose(in_file);
|
||||
if (detection_file) {
|
||||
fclose(detection_file);
|
||||
}
|
||||
if (reference_file) {
|
||||
fclose(reference_file);
|
||||
}
|
||||
fclose(out_file);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
google::SetUsageMessage(webrtc::kUsage);
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
webrtc::void_main();
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,424 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
#include <deque>
|
||||
#include <set>
|
||||
|
||||
#include "webrtc/common_audio/include/audio_util.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_processing/transient/common.h"
|
||||
#include "webrtc/modules/audio_processing/transient/transient_detector.h"
|
||||
#include "webrtc/modules/audio_processing/ns/windows_private.h"
|
||||
extern "C" {
|
||||
#include "webrtc/modules/audio_processing/utility/fft4g.h"
|
||||
}
|
||||
#include "webrtc/system_wrappers/interface/logging.h"
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const float kMeanIIRCoefficient = 0.5f;
|
||||
static const float kVoiceThreshold = 0.02f;
|
||||
|
||||
// TODO(aluebs): Check if these values work also for 48kHz.
|
||||
static const size_t kMinVoiceBin = 3;
|
||||
static const size_t kMaxVoiceBin = 60;
|
||||
|
||||
namespace {
|
||||
float ComplexMagnitude(float a, float b) {
|
||||
return std::abs(a) + std::abs(b);
|
||||
}
|
||||
}
|
||||
|
||||
TransientSuppressor::TransientSuppressor()
|
||||
: data_length_(0),
|
||||
detection_length_(0),
|
||||
analysis_length_(0),
|
||||
buffer_delay_(0),
|
||||
complex_analysis_length_(0),
|
||||
num_channels_(0),
|
||||
window_(NULL),
|
||||
detector_smoothed_(0.f),
|
||||
keypress_counter_(0),
|
||||
chunks_since_keypress_(0),
|
||||
detection_enabled_(false),
|
||||
suppression_enabled_(false),
|
||||
use_hard_restoration_(false),
|
||||
chunks_since_voice_change_(0),
|
||||
seed_(182),
|
||||
using_reference_(false) {
|
||||
}
|
||||
|
||||
TransientSuppressor::~TransientSuppressor() {}
|
||||
|
||||
int TransientSuppressor::Initialize(int sample_rate_hz,
|
||||
int detection_rate_hz,
|
||||
int num_channels) {
|
||||
switch (sample_rate_hz) {
|
||||
case ts::kSampleRate8kHz:
|
||||
analysis_length_ = 128u;
|
||||
window_ = kBlocks80w128;
|
||||
break;
|
||||
case ts::kSampleRate16kHz:
|
||||
analysis_length_ = 256u;
|
||||
window_ = kBlocks160w256;
|
||||
break;
|
||||
case ts::kSampleRate32kHz:
|
||||
analysis_length_ = 512u;
|
||||
window_ = kBlocks320w512;
|
||||
break;
|
||||
case ts::kSampleRate48kHz:
|
||||
analysis_length_ = 1024u;
|
||||
window_ = kBlocks480w1024;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
if (detection_rate_hz != ts::kSampleRate8kHz &&
|
||||
detection_rate_hz != ts::kSampleRate16kHz &&
|
||||
detection_rate_hz != ts::kSampleRate32kHz &&
|
||||
detection_rate_hz != ts::kSampleRate48kHz) {
|
||||
return -1;
|
||||
}
|
||||
if (num_channels <= 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
detector_.reset(new TransientDetector(detection_rate_hz));
|
||||
data_length_ = sample_rate_hz * ts::kChunkSizeMs / 1000;
|
||||
if (data_length_ > analysis_length_) {
|
||||
assert(false);
|
||||
return -1;
|
||||
}
|
||||
buffer_delay_ = analysis_length_ - data_length_;
|
||||
|
||||
complex_analysis_length_ = analysis_length_ / 2 + 1;
|
||||
assert(complex_analysis_length_ >= kMaxVoiceBin);
|
||||
num_channels_ = num_channels;
|
||||
in_buffer_.reset(new float[analysis_length_ * num_channels_]);
|
||||
memset(in_buffer_.get(),
|
||||
0,
|
||||
analysis_length_ * num_channels_ * sizeof(in_buffer_[0]));
|
||||
detection_length_ = detection_rate_hz * ts::kChunkSizeMs / 1000;
|
||||
detection_buffer_.reset(new float[detection_length_]);
|
||||
memset(detection_buffer_.get(),
|
||||
0,
|
||||
detection_length_ * sizeof(detection_buffer_[0]));
|
||||
out_buffer_.reset(new float[analysis_length_ * num_channels_]);
|
||||
memset(out_buffer_.get(),
|
||||
0,
|
||||
analysis_length_ * num_channels_ * sizeof(out_buffer_[0]));
|
||||
// ip[0] must be zero to trigger initialization using rdft().
|
||||
size_t ip_length = 2 + sqrtf(analysis_length_);
|
||||
ip_.reset(new int[ip_length]());
|
||||
memset(ip_.get(), 0, ip_length * sizeof(ip_[0]));
|
||||
wfft_.reset(new float[complex_analysis_length_ - 1]);
|
||||
memset(wfft_.get(), 0, (complex_analysis_length_ - 1) * sizeof(wfft_[0]));
|
||||
spectral_mean_.reset(new float[complex_analysis_length_ * num_channels_]);
|
||||
memset(spectral_mean_.get(),
|
||||
0,
|
||||
complex_analysis_length_ * num_channels_ * sizeof(spectral_mean_[0]));
|
||||
fft_buffer_.reset(new float[analysis_length_ + 2]);
|
||||
memset(fft_buffer_.get(), 0, (analysis_length_ + 2) * sizeof(fft_buffer_[0]));
|
||||
magnitudes_.reset(new float[complex_analysis_length_]);
|
||||
memset(magnitudes_.get(),
|
||||
0,
|
||||
complex_analysis_length_ * sizeof(magnitudes_[0]));
|
||||
mean_factor_.reset(new float[complex_analysis_length_]);
|
||||
|
||||
static const float kFactorHeight = 10.f;
|
||||
static const float kLowSlope = 1.f;
|
||||
static const float kHighSlope = 0.3f;
|
||||
for (size_t i = 0; i < complex_analysis_length_; ++i) {
|
||||
mean_factor_[i] =
|
||||
kFactorHeight /
|
||||
(1.f + exp(kLowSlope * static_cast<int>(i - kMinVoiceBin))) +
|
||||
kFactorHeight /
|
||||
(1.f + exp(kHighSlope * static_cast<int>(kMaxVoiceBin - i)));
|
||||
}
|
||||
detector_smoothed_ = 0.f;
|
||||
keypress_counter_ = 0;
|
||||
chunks_since_keypress_ = 0;
|
||||
detection_enabled_ = false;
|
||||
suppression_enabled_ = false;
|
||||
use_hard_restoration_ = false;
|
||||
chunks_since_voice_change_ = 0;
|
||||
seed_ = 182;
|
||||
using_reference_ = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int TransientSuppressor::Suppress(float* data,
|
||||
size_t data_length,
|
||||
int num_channels,
|
||||
const float* detection_data,
|
||||
size_t detection_length,
|
||||
const float* reference_data,
|
||||
size_t reference_length,
|
||||
float voice_probability,
|
||||
bool key_pressed) {
|
||||
if (!data || data_length != data_length_ || num_channels != num_channels_ ||
|
||||
detection_length != detection_length_ || voice_probability < 0 ||
|
||||
voice_probability > 1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
UpdateKeypress(key_pressed);
|
||||
UpdateBuffers(data);
|
||||
|
||||
int result = 0;
|
||||
if (detection_enabled_) {
|
||||
UpdateRestoration(voice_probability);
|
||||
|
||||
if (!detection_data) {
|
||||
// Use the input data of the first channel if special detection data is
|
||||
// not supplied.
|
||||
detection_data = &in_buffer_[buffer_delay_];
|
||||
}
|
||||
|
||||
float detector_result = detector_->Detect(
|
||||
detection_data, detection_length, reference_data, reference_length);
|
||||
if (detector_result < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
using_reference_ = detector_->using_reference();
|
||||
|
||||
// |detector_smoothed_| follows the |detector_result| when this last one is
|
||||
// increasing, but has an exponential decaying tail to be able to suppress
|
||||
// the ringing of keyclicks.
|
||||
float smooth_factor = using_reference_ ? 0.6 : 0.1;
|
||||
detector_smoothed_ = detector_result >= detector_smoothed_
|
||||
? detector_result
|
||||
: smooth_factor * detector_smoothed_ +
|
||||
(1 - smooth_factor) * detector_result;
|
||||
|
||||
for (int i = 0; i < num_channels_; ++i) {
|
||||
Suppress(&in_buffer_[i * analysis_length_],
|
||||
&spectral_mean_[i * complex_analysis_length_],
|
||||
&out_buffer_[i * analysis_length_]);
|
||||
}
|
||||
}
|
||||
|
||||
// If the suppression isn't enabled, we use the in buffer to delay the signal
|
||||
// appropriately. This also gives time for the out buffer to be refreshed with
|
||||
// new data between detection and suppression getting enabled.
|
||||
for (int i = 0; i < num_channels_; ++i) {
|
||||
memcpy(&data[i * data_length_],
|
||||
suppression_enabled_ ? &out_buffer_[i * analysis_length_]
|
||||
: &in_buffer_[i * analysis_length_],
|
||||
data_length_ * sizeof(*data));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// This should only be called when detection is enabled. UpdateBuffers() must
|
||||
// have been called. At return, |out_buffer_| will be filled with the
|
||||
// processed output.
|
||||
void TransientSuppressor::Suppress(float* in_ptr,
|
||||
float* spectral_mean,
|
||||
float* out_ptr) {
|
||||
// Go to frequency domain.
|
||||
for (size_t i = 0; i < analysis_length_; ++i) {
|
||||
// TODO(aluebs): Rename windows
|
||||
fft_buffer_[i] = in_ptr[i] * window_[i];
|
||||
}
|
||||
|
||||
WebRtc_rdft(analysis_length_, 1, fft_buffer_.get(), ip_.get(), wfft_.get());
|
||||
|
||||
// Since WebRtc_rdft puts R[n/2] in fft_buffer_[1], we move it to the end
|
||||
// for convenience.
|
||||
fft_buffer_[analysis_length_] = fft_buffer_[1];
|
||||
fft_buffer_[analysis_length_ + 1] = 0.f;
|
||||
fft_buffer_[1] = 0.f;
|
||||
|
||||
for (size_t i = 0; i < complex_analysis_length_; ++i) {
|
||||
magnitudes_[i] = ComplexMagnitude(fft_buffer_[i * 2],
|
||||
fft_buffer_[i * 2 + 1]);
|
||||
}
|
||||
// Restore audio if necessary.
|
||||
if (suppression_enabled_) {
|
||||
if (use_hard_restoration_) {
|
||||
HardRestoration(spectral_mean);
|
||||
} else {
|
||||
SoftRestoration(spectral_mean);
|
||||
}
|
||||
}
|
||||
|
||||
// Update the spectral mean.
|
||||
for (size_t i = 0; i < complex_analysis_length_; ++i) {
|
||||
spectral_mean[i] = (1 - kMeanIIRCoefficient) * spectral_mean[i] +
|
||||
kMeanIIRCoefficient * magnitudes_[i];
|
||||
}
|
||||
|
||||
// Back to time domain.
|
||||
// Put R[n/2] back in fft_buffer_[1].
|
||||
fft_buffer_[1] = fft_buffer_[analysis_length_];
|
||||
|
||||
WebRtc_rdft(analysis_length_,
|
||||
-1,
|
||||
fft_buffer_.get(),
|
||||
ip_.get(),
|
||||
wfft_.get());
|
||||
const float fft_scaling = 2.f / analysis_length_;
|
||||
|
||||
for (size_t i = 0; i < analysis_length_; ++i) {
|
||||
out_ptr[i] += fft_buffer_[i] * window_[i] * fft_scaling;
|
||||
}
|
||||
}
|
||||
|
||||
void TransientSuppressor::UpdateKeypress(bool key_pressed) {
|
||||
const int kKeypressPenalty = 1000 / ts::kChunkSizeMs;
|
||||
const int kIsTypingThreshold = 1000 / ts::kChunkSizeMs;
|
||||
const int kChunksUntilNotTyping = 4000 / ts::kChunkSizeMs; // 4 seconds.
|
||||
|
||||
if (key_pressed) {
|
||||
keypress_counter_ += kKeypressPenalty;
|
||||
chunks_since_keypress_ = 0;
|
||||
detection_enabled_ = true;
|
||||
}
|
||||
keypress_counter_ = std::max(0, keypress_counter_ - 1);
|
||||
|
||||
if (keypress_counter_ > kIsTypingThreshold) {
|
||||
if (!suppression_enabled_) {
|
||||
LOG(LS_INFO) << "[ts] Transient suppression is now enabled.";
|
||||
}
|
||||
suppression_enabled_ = true;
|
||||
keypress_counter_ = 0;
|
||||
}
|
||||
|
||||
if (detection_enabled_ &&
|
||||
++chunks_since_keypress_ > kChunksUntilNotTyping) {
|
||||
if (suppression_enabled_) {
|
||||
LOG(LS_INFO) << "[ts] Transient suppression is now disabled.";
|
||||
}
|
||||
detection_enabled_ = false;
|
||||
suppression_enabled_ = false;
|
||||
keypress_counter_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void TransientSuppressor::UpdateRestoration(float voice_probability) {
|
||||
const int kHardRestorationOffsetDelay = 3;
|
||||
const int kHardRestorationOnsetDelay = 80;
|
||||
|
||||
bool not_voiced = voice_probability < kVoiceThreshold;
|
||||
|
||||
if (not_voiced == use_hard_restoration_) {
|
||||
chunks_since_voice_change_ = 0;
|
||||
} else {
|
||||
++chunks_since_voice_change_;
|
||||
|
||||
if ((use_hard_restoration_ &&
|
||||
chunks_since_voice_change_ > kHardRestorationOffsetDelay) ||
|
||||
(!use_hard_restoration_ &&
|
||||
chunks_since_voice_change_ > kHardRestorationOnsetDelay)) {
|
||||
use_hard_restoration_ = not_voiced;
|
||||
chunks_since_voice_change_ = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shift buffers to make way for new data. Must be called after
|
||||
// |detection_enabled_| is updated by UpdateKeypress().
|
||||
void TransientSuppressor::UpdateBuffers(float* data) {
|
||||
// TODO(aluebs): Change to ring buffer.
|
||||
memmove(in_buffer_.get(),
|
||||
&in_buffer_[data_length_],
|
||||
(buffer_delay_ + (num_channels_ - 1) * analysis_length_) *
|
||||
sizeof(in_buffer_[0]));
|
||||
// Copy new chunk to buffer.
|
||||
for (int i = 0; i < num_channels_; ++i) {
|
||||
memcpy(&in_buffer_[buffer_delay_ + i * analysis_length_],
|
||||
&data[i * data_length_],
|
||||
data_length_ * sizeof(*data));
|
||||
}
|
||||
if (detection_enabled_) {
|
||||
// Shift previous chunk in out buffer.
|
||||
memmove(out_buffer_.get(),
|
||||
&out_buffer_[data_length_],
|
||||
(buffer_delay_ + (num_channels_ - 1) * analysis_length_) *
|
||||
sizeof(out_buffer_[0]));
|
||||
// Initialize new chunk in out buffer.
|
||||
for (int i = 0; i < num_channels_; ++i) {
|
||||
memset(&out_buffer_[buffer_delay_ + i * analysis_length_],
|
||||
0,
|
||||
data_length_ * sizeof(out_buffer_[0]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Restores the unvoiced signal if a click is present.
|
||||
// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds
|
||||
// the spectral mean. The attenuation depends on |detector_smoothed_|.
|
||||
// If a restoration takes place, the |magnitudes_| are updated to the new value.
|
||||
void TransientSuppressor::HardRestoration(float* spectral_mean) {
|
||||
const float detector_result =
|
||||
1.f - pow(1.f - detector_smoothed_, using_reference_ ? 200.f : 50.f);
|
||||
// To restore, we get the peaks in the spectrum. If higher than the previous
|
||||
// spectral mean we adjust them.
|
||||
for (size_t i = 0; i < complex_analysis_length_; ++i) {
|
||||
if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0) {
|
||||
// RandU() generates values on [0, int16::max()]
|
||||
const float phase = 2 * ts::kPi * WebRtcSpl_RandU(&seed_) /
|
||||
std::numeric_limits<int16_t>::max();
|
||||
const float scaled_mean = detector_result * spectral_mean[i];
|
||||
|
||||
fft_buffer_[i * 2] = (1 - detector_result) * fft_buffer_[i * 2] +
|
||||
scaled_mean * cosf(phase);
|
||||
fft_buffer_[i * 2 + 1] = (1 - detector_result) * fft_buffer_[i * 2 + 1] +
|
||||
scaled_mean * sinf(phase);
|
||||
magnitudes_[i] = magnitudes_[i] -
|
||||
detector_result * (magnitudes_[i] - spectral_mean[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Restores the voiced signal if a click is present.
|
||||
// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds
|
||||
// the spectral mean and that is lower than some function of the current block
|
||||
// frequency mean. The attenuation depends on |detector_smoothed_|.
|
||||
// If a restoration takes place, the |magnitudes_| are updated to the new value.
|
||||
void TransientSuppressor::SoftRestoration(float* spectral_mean) {
|
||||
// Get the spectral magnitude mean of the current block.
|
||||
float block_frequency_mean = 0;
|
||||
for (size_t i = kMinVoiceBin; i < kMaxVoiceBin; ++i) {
|
||||
block_frequency_mean += magnitudes_[i];
|
||||
}
|
||||
block_frequency_mean /= (kMaxVoiceBin - kMinVoiceBin);
|
||||
|
||||
// To restore, we get the peaks in the spectrum. If higher than the
|
||||
// previous spectral mean and lower than a factor of the block mean
|
||||
// we adjust them. The factor is a double sigmoid that has a minimum in the
|
||||
// voice frequency range (300Hz - 3kHz).
|
||||
for (size_t i = 0; i < complex_analysis_length_; ++i) {
|
||||
if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0 &&
|
||||
(using_reference_ ||
|
||||
magnitudes_[i] < block_frequency_mean * mean_factor_[i])) {
|
||||
const float new_magnitude =
|
||||
magnitudes_[i] -
|
||||
detector_smoothed_ * (magnitudes_[i] - spectral_mean[i]);
|
||||
const float magnitude_ratio = new_magnitude / magnitudes_[i];
|
||||
|
||||
fft_buffer_[i * 2] *= magnitude_ratio;
|
||||
fft_buffer_[i * 2 + 1] *= magnitude_ratio;
|
||||
magnitudes_[i] = new_magnitude;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
120
webrtc/modules/audio_processing/transient/transient_suppressor.h
Normal file
120
webrtc/modules/audio_processing/transient/transient_suppressor.h
Normal file
@ -0,0 +1,120 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
|
||||
|
||||
#include <deque>
|
||||
#include <set>
|
||||
|
||||
#include "webrtc/system_wrappers/interface/scoped_ptr.h"
|
||||
#include "webrtc/test/testsupport/gtest_prod_util.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class TransientDetector;
|
||||
|
||||
// Detects transients in an audio stream and suppress them using a simple
|
||||
// restoration algorithm that attenuates unexpected spikes in the spectrum.
|
||||
class TransientSuppressor {
|
||||
public:
|
||||
TransientSuppressor();
|
||||
~TransientSuppressor();
|
||||
|
||||
int Initialize(int sample_rate_hz, int detector_rate_hz, int num_channels);
|
||||
|
||||
// Processes a |data| chunk, and returns it with keystrokes suppressed from
|
||||
// it. The float format is assumed to be int16 ranged. If there are more than
|
||||
// one channel, the chunks are concatenated one after the other in |data|.
|
||||
// |data_length| must be equal to |data_length_|.
|
||||
// |num_channels| must be equal to |num_channels_|.
|
||||
// A sub-band, ideally the higher, can be used as |detection_data|. If it is
|
||||
// NULL, |data| is used for the detection too. The |detection_data| is always
|
||||
// assumed mono.
|
||||
// If a reference signal (e.g. keyboard microphone) is available, it can be
|
||||
// passed in as |reference_data|. It is assumed mono and must have the same
|
||||
// length as |data|. NULL is accepted if unavailable.
|
||||
// This suppressor performs better if voice information is available.
|
||||
// |voice_probability| is the probability of voice being present in this chunk
|
||||
// of audio. If voice information is not available, |voice_probability| must
|
||||
// always be set to 1.
|
||||
// |key_pressed| determines if a key was pressed on this audio chunk.
|
||||
// Returns 0 on success and -1 otherwise.
|
||||
int Suppress(float* data,
|
||||
size_t data_length,
|
||||
int num_channels,
|
||||
const float* detection_data,
|
||||
size_t detection_length,
|
||||
const float* reference_data,
|
||||
size_t reference_length,
|
||||
float voice_probability,
|
||||
bool key_pressed);
|
||||
|
||||
private:
|
||||
FRIEND_TEST_ALL_PREFIXES(TransientSuppressorTest,
|
||||
TypingDetectionLogicWorksAsExpectedForMono);
|
||||
void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr);
|
||||
|
||||
void UpdateKeypress(bool key_pressed);
|
||||
void UpdateRestoration(float voice_probability);
|
||||
|
||||
void UpdateBuffers(float* data);
|
||||
|
||||
void HardRestoration(float* spectral_mean);
|
||||
void SoftRestoration(float* spectral_mean);
|
||||
|
||||
scoped_ptr<TransientDetector> detector_;
|
||||
|
||||
size_t data_length_;
|
||||
size_t detection_length_;
|
||||
size_t analysis_length_;
|
||||
size_t buffer_delay_;
|
||||
size_t complex_analysis_length_;
|
||||
int num_channels_;
|
||||
// Input buffer where the original samples are stored.
|
||||
scoped_ptr<float[]> in_buffer_;
|
||||
scoped_ptr<float[]> detection_buffer_;
|
||||
// Output buffer where the restored samples are stored.
|
||||
scoped_ptr<float[]> out_buffer_;
|
||||
|
||||
// Arrays for fft.
|
||||
scoped_ptr<int[]> ip_;
|
||||
scoped_ptr<float[]> wfft_;
|
||||
|
||||
scoped_ptr<float[]> spectral_mean_;
|
||||
|
||||
// Stores the data for the fft.
|
||||
scoped_ptr<float[]> fft_buffer_;
|
||||
|
||||
scoped_ptr<float[]> magnitudes_;
|
||||
|
||||
const float* window_;
|
||||
|
||||
scoped_ptr<float[]> mean_factor_;
|
||||
|
||||
float detector_smoothed_;
|
||||
|
||||
int keypress_counter_;
|
||||
int chunks_since_keypress_;
|
||||
bool detection_enabled_;
|
||||
bool suppression_enabled_;
|
||||
|
||||
bool use_hard_restoration_;
|
||||
int chunks_since_voice_change_;
|
||||
|
||||
uint32_t seed_;
|
||||
|
||||
bool using_reference_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
|
@ -0,0 +1,85 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "webrtc/modules/audio_processing/transient/common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
TEST(TransientSuppressorTest, TypingDetectionLogicWorksAsExpectedForMono) {
|
||||
static const int kNumChannels = 1;
|
||||
|
||||
TransientSuppressor ts;
|
||||
ts.Initialize(ts::kSampleRate16kHz, ts::kSampleRate16kHz, kNumChannels);
|
||||
|
||||
// Each key-press enables detection.
|
||||
EXPECT_FALSE(ts.detection_enabled_);
|
||||
ts.UpdateKeypress(true);
|
||||
EXPECT_TRUE(ts.detection_enabled_);
|
||||
|
||||
// It takes four seconds without any key-press to disable the detection
|
||||
for (int time_ms = 0; time_ms < 3990; time_ms += ts::kChunkSizeMs) {
|
||||
ts.UpdateKeypress(false);
|
||||
EXPECT_TRUE(ts.detection_enabled_);
|
||||
}
|
||||
ts.UpdateKeypress(false);
|
||||
EXPECT_FALSE(ts.detection_enabled_);
|
||||
|
||||
// Key-presses that are more than a second apart from each other don't enable
|
||||
// suppression.
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
EXPECT_FALSE(ts.suppression_enabled_);
|
||||
ts.UpdateKeypress(true);
|
||||
EXPECT_TRUE(ts.detection_enabled_);
|
||||
EXPECT_FALSE(ts.suppression_enabled_);
|
||||
for (int time_ms = 0; time_ms < 990; time_ms += ts::kChunkSizeMs) {
|
||||
ts.UpdateKeypress(false);
|
||||
EXPECT_TRUE(ts.detection_enabled_);
|
||||
EXPECT_FALSE(ts.suppression_enabled_);
|
||||
}
|
||||
ts.UpdateKeypress(false);
|
||||
}
|
||||
|
||||
// Two consecutive key-presses is enough to enable the suppression.
|
||||
ts.UpdateKeypress(true);
|
||||
EXPECT_FALSE(ts.suppression_enabled_);
|
||||
ts.UpdateKeypress(true);
|
||||
EXPECT_TRUE(ts.suppression_enabled_);
|
||||
|
||||
// Key-presses that are less than a second apart from each other don't disable
|
||||
// detection nor suppression.
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
for (int time_ms = 0; time_ms < 1000; time_ms += ts::kChunkSizeMs) {
|
||||
ts.UpdateKeypress(false);
|
||||
EXPECT_TRUE(ts.detection_enabled_);
|
||||
EXPECT_TRUE(ts.suppression_enabled_);
|
||||
}
|
||||
ts.UpdateKeypress(true);
|
||||
EXPECT_TRUE(ts.detection_enabled_);
|
||||
EXPECT_TRUE(ts.suppression_enabled_);
|
||||
}
|
||||
|
||||
// It takes four seconds without any key-press to disable the detection and
|
||||
// suppression.
|
||||
for (int time_ms = 0; time_ms < 3990; time_ms += ts::kChunkSizeMs) {
|
||||
ts.UpdateKeypress(false);
|
||||
EXPECT_TRUE(ts.detection_enabled_);
|
||||
EXPECT_TRUE(ts.suppression_enabled_);
|
||||
}
|
||||
for (int time_ms = 0; time_ms < 1000; time_ms += ts::kChunkSizeMs) {
|
||||
ts.UpdateKeypress(false);
|
||||
EXPECT_FALSE(ts.detection_enabled_);
|
||||
EXPECT_FALSE(ts.suppression_enabled_);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user