From a7f77720cb9f786708b851646e653b5949935e32 Mon Sep 17 00:00:00 2001 From: "pbos@webrtc.org" Date: Mon, 15 Dec 2014 16:33:16 +0000 Subject: [PATCH] Merge in AGC manager and AGC tools. R=bjornv@webrtc.org BUG=4098 Review URL: https://webrtc-codereview.appspot.com/37379004 git-svn-id: http://webrtc.googlecode.com/svn/trunk@7902 4adac7df-926f-26a2-2b94-8c16560cd09d --- .../agc/test/activity_metric.cc | 384 ++++++++++++++++++ .../audio_processing/agc/test/agc_harness.cc | 286 +++++++++++++ .../audio_processing/agc/test/agc_manager.cc | 252 ++++++++++++ .../audio_processing/agc/test/agc_manager.h | 81 ++++ .../audio_processing/agc/test/agc_test.cc | 155 +++++++ webrtc/tools/tools.gyp | 56 +++ webrtc/webrtc.gyp | 12 +- 7 files changed, 1220 insertions(+), 6 deletions(-) create mode 100644 webrtc/modules/audio_processing/agc/test/activity_metric.cc create mode 100644 webrtc/modules/audio_processing/agc/test/agc_harness.cc create mode 100644 webrtc/modules/audio_processing/agc/test/agc_manager.cc create mode 100644 webrtc/modules/audio_processing/agc/test/agc_manager.h create mode 100644 webrtc/modules/audio_processing/agc/test/agc_test.cc diff --git a/webrtc/modules/audio_processing/agc/test/activity_metric.cc b/webrtc/modules/audio_processing/agc/test/activity_metric.cc new file mode 100644 index 000000000..474b5534e --- /dev/null +++ b/webrtc/modules/audio_processing/agc/test/activity_metric.cc @@ -0,0 +1,384 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#include +#include +#include + +#include + +#include "gflags/gflags.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/modules/audio_processing/agc/agc.h" +#include "webrtc/modules/audio_processing/agc/agc_audio_proc.h" +#include "webrtc/modules/audio_processing/agc/common.h" +#include "webrtc/modules/audio_processing/agc/histogram.h" +#include "webrtc/modules/audio_processing/agc/pitch_based_vad.h" +#include "webrtc/modules/audio_processing/agc/standalone_vad.h" +#include "webrtc/modules/audio_processing/agc/utility.h" +#include "webrtc/modules/interface/module_common_types.h" + +static const int kAgcAnalWindowSamples = 100; +static const double kDefaultActivityThreshold = 0.3; + +DEFINE_bool(standalone_vad, true, "enable stand-alone VAD"); +DEFINE_string(true_vad, "", "name of a file containing true VAD in 'int'" + " format"); +DEFINE_string(video_vad, "", "name of a file containing video VAD (activity" + " probabilities) in double format. One activity per 10ms is" + " required. If no file is given the video information is not" + " incorporated. Negative activity is interpreted as video is" + " not adapted and the statistics are not computed during" + " the learning phase. Note that the negative video activities" + " are ONLY allowed at the beginning."); +DEFINE_string(result, "", "name of a file to write the results. The results" + " will be appended to the end of the file. This is optional."); +DEFINE_string(audio_content, "", "name of a file where audio content is written" + " to, in double format."); +DEFINE_double(activity_threshold, kDefaultActivityThreshold, + "Activity threshold"); + +namespace webrtc { + +// TODO(turajs) A new CL will be committed soon where ExtractFeatures will +// notify the caller of "silence" input, instead of bailing out. We would not +// need the following function when such a change is made. + +// Add some dither to quiet frames. This avoids the ExtractFeatures skip a +// silence frame. Otherwise true VAD would drift with respect to the audio. +// We only consider mono inputs. +static void DitherSilence(AudioFrame* frame) { + ASSERT_EQ(1, frame->num_channels_); + const double kRmsSilence = 5; + const double sum_squared_silence = kRmsSilence * kRmsSilence * + frame->samples_per_channel_; + double sum_squared = 0; + for (int n = 0; n < frame->samples_per_channel_; n++) + sum_squared += frame->data_[n] * frame->data_[n]; + if (sum_squared <= sum_squared_silence) { + for (int n = 0; n < frame->samples_per_channel_; n++) + frame->data_[n] = (rand() & 0xF) - 8; + } +} + +class AgcStat { + public: + AgcStat() + : video_index_(0), + activity_threshold_(kDefaultActivityThreshold), + audio_content_(Histogram::Create(kAgcAnalWindowSamples)), + audio_processing_(new AgcAudioProc()), + vad_(new PitchBasedVad()), + standalone_vad_(StandaloneVad::Create()), + audio_content_fid_(NULL) { + for (int n = 0; n < kMaxNumFrames; n++) + video_vad_[n] = 0.5; + } + + ~AgcStat() { + if (audio_content_fid_ != NULL) { + fclose(audio_content_fid_); + } + } + + void set_audio_content_file(FILE* audio_content_fid) { + audio_content_fid_ = audio_content_fid; + } + + int AddAudio(const AudioFrame& frame, double p_video, + int* combined_vad) { + if (frame.num_channels_ != 1 || + frame.samples_per_channel_ != + kSampleRateHz / 100 || + frame.sample_rate_hz_ != kSampleRateHz) + return -1; + video_vad_[video_index_++] = p_video; + AudioFeatures features; + audio_processing_->ExtractFeatures( + frame.data_, frame.samples_per_channel_, &features); + if (FLAGS_standalone_vad) { + standalone_vad_->AddAudio(frame.data_, + frame.samples_per_channel_); + } + if (features.num_frames > 0) { + double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5}; + if (FLAGS_standalone_vad) { + standalone_vad_->GetActivity(p, kMaxNumFrames); + } + // TODO(turajs) combining and limiting are used in the source files as + // well they can be moved to utility. + // Combine Video and stand-alone VAD. + for (int n = 0; n < features.num_frames; n++) { + double p_active = p[n] * video_vad_[n]; + double p_passive = (1 - p[n]) * (1 - video_vad_[n]); + p[n] = p_active / (p_active + p_passive); + // Limit probabilities. + p[n] = std::min(std::max(p[n], 0.01), 0.99); + } + if (vad_->VoicingProbability(features, p) < 0) + return -1; + for (int n = 0; n < features.num_frames; n++) { + audio_content_->Update(features.rms[n], p[n]); + double ac = audio_content_->AudioContent(); + if (audio_content_fid_ != NULL) { + fwrite(&ac, sizeof(ac), 1, audio_content_fid_); + } + if (ac > kAgcAnalWindowSamples * activity_threshold_) { + combined_vad[n] = 1; + } else { + combined_vad[n] = 0; + } + } + video_index_ = 0; + } + return features.num_frames; + } + + void Reset() { + audio_content_->Reset(); + } + + void SetActivityThreshold(double activity_threshold) { + activity_threshold_ = activity_threshold; + } + + private: + int video_index_; + double activity_threshold_; + double video_vad_[kMaxNumFrames]; + scoped_ptr audio_content_; + scoped_ptr audio_processing_; + scoped_ptr vad_; + scoped_ptr standalone_vad_; + + FILE* audio_content_fid_; +}; + + +void void_main(int argc, char* argv[]) { + webrtc::AgcStat agc_stat; + + FILE* pcm_fid = fopen(argv[1], "rb"); + ASSERT_TRUE(pcm_fid != NULL) << "Cannot open PCM file " << argv[1]; + + if (argc < 2) { + fprintf(stderr, "\nNot Enough arguments\n"); + } + + FILE* true_vad_fid = NULL; + ASSERT_GT(FLAGS_true_vad.size(), 0u) << "Specify the file containing true " + "VADs using --true_vad flag."; + true_vad_fid = fopen(FLAGS_true_vad.c_str(), "rb"); + ASSERT_TRUE(true_vad_fid != NULL) << "Cannot open the active list " << + FLAGS_true_vad; + + FILE* results_fid = NULL; + if (FLAGS_result.size() > 0) { + // True if this is the first time writing to this function and we add a + // header to the beginning of the file. + bool write_header; + // Open in the read mode. If it fails, the file doesn't exist and has to + // write a header for it. Otherwise no need to write a header. + results_fid = fopen(FLAGS_result.c_str(), "r"); + if (results_fid == NULL) { + write_header = true; + } else { + fclose(results_fid); + write_header = false; + } + // Open in append mode. + results_fid = fopen(FLAGS_result.c_str(), "a"); + ASSERT_TRUE(results_fid != NULL) << "Cannot open the file, " << + FLAGS_result << ", to write the results."; + // Write the header if required. + if (write_header) { + fprintf(results_fid, "%% Total Active, Misdetection, " + "Total inactive, False Positive, On-sets, Missed segments, " + "Average response\n"); + } + } + + FILE* video_vad_fid = NULL; + if (FLAGS_video_vad.size() > 0) { + video_vad_fid = fopen(FLAGS_video_vad.c_str(), "rb"); + ASSERT_TRUE(video_vad_fid != NULL) << "Cannot open the file, " << + FLAGS_video_vad << " to read video-based VAD decisions.\n"; + } + + // AgsStat will be the owner of this file and will close it at its + // destructor. + FILE* audio_content_fid = NULL; + if (FLAGS_audio_content.size() > 0) { + audio_content_fid = fopen(FLAGS_audio_content.c_str(), "wb"); + ASSERT_TRUE(audio_content_fid != NULL) << "Cannot open file, " << + FLAGS_audio_content << " to write audio-content.\n"; + agc_stat.set_audio_content_file(audio_content_fid); + } + + webrtc::AudioFrame frame; + frame.num_channels_ = 1; + frame.sample_rate_hz_ = 16000; + frame.samples_per_channel_ = frame.sample_rate_hz_ / 100; + const size_t kSamplesToRead = frame.num_channels_ * + frame.samples_per_channel_; + + agc_stat.SetActivityThreshold(FLAGS_activity_threshold); + + int ret_val = 0; + int num_frames = 0; + int agc_vad[kMaxNumFrames]; + uint8_t true_vad[kMaxNumFrames]; + double p_video = 0.5; + int total_active = 0; + int total_passive = 0; + int total_false_positive = 0; + int total_missed_detection = 0; + int onset_adaptation = 0; + int num_onsets = 0; + bool onset = false; + uint8_t previous_true_vad = 0; + int num_not_adapted = 0; + int true_vad_index = 0; + bool in_false_positive_region = false; + int total_false_positive_duration = 0; + bool video_adapted = false; + while (kSamplesToRead == fread(frame.data_, sizeof(int16_t), + kSamplesToRead, pcm_fid)) { + assert(true_vad_index < kMaxNumFrames); + ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1, + true_vad_fid)) + << "Size mismatch between True-VAD and the PCM file.\n"; + if (video_vad_fid != NULL) { + ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid)) << + "Not enough video-based VAD probabilities."; + } + + // Negative video activity indicates that the video-based VAD is not yet + // adapted. Disregards the learning phase in statistics. + if (p_video < 0) { + if (video_adapted) { + fprintf(stderr, "Negative video probabilities ONLY allowed at the " + "beginning of the sequence, not in the middle.\n"); + exit(1); + } + continue; + } else { + video_adapted = true; + } + + num_frames++; + uint8_t last_true_vad; + if (true_vad_index == 0) { + last_true_vad = previous_true_vad; + } else { + last_true_vad = true_vad[true_vad_index - 1]; + } + if (last_true_vad == 1 && true_vad[true_vad_index] == 0) { + agc_stat.Reset(); + } + true_vad_index++; + + DitherSilence(&frame); + + ret_val = agc_stat.AddAudio(frame, p_video, agc_vad); + ASSERT_GE(ret_val, 0); + + if (ret_val > 0) { + ASSERT_TRUE(ret_val == true_vad_index); + for (int n = 0; n < ret_val; n++) { + if (true_vad[n] == 1) { + total_active++; + if (previous_true_vad == 0) { + num_onsets++; + onset = true; + } + if (agc_vad[n] == 0) { + total_missed_detection++; + if (onset) + onset_adaptation++; + } else { + in_false_positive_region = false; + onset = false; + } + } else if (true_vad[n] == 0) { + // Check if |on_set| flag is still up. If so it means that we totally + // missed an active region + if (onset) + num_not_adapted++; + onset = false; + + total_passive++; + if (agc_vad[n] == 1) { + total_false_positive++; + in_false_positive_region = true; + } + if (in_false_positive_region) { + total_false_positive_duration++; + } + } else { + ASSERT_TRUE(false) << "Invalid value for true-VAD.\n"; + } + previous_true_vad = true_vad[n]; + } + true_vad_index = 0; + } + } + + if (results_fid != NULL) { + fprintf(results_fid, "%4d %4d %4d %4d %4d %4d %4.0f %4.0f\n", + total_active, + total_missed_detection, + total_passive, + total_false_positive, + num_onsets, + num_not_adapted, + static_cast(onset_adaptation) / (num_onsets + 1e-12), + static_cast(total_false_positive_duration) / + (total_passive + 1e-12)); + } + fprintf(stdout, "%4d %4d %4d %4d %4d %4d %4.0f %4.0f\n", + total_active, + total_missed_detection, + total_passive, + total_false_positive, + num_onsets, + num_not_adapted, + static_cast(onset_adaptation) / (num_onsets + 1e-12), + static_cast(total_false_positive_duration) / + (total_passive + 1e-12)); + + fclose(true_vad_fid); + fclose(pcm_fid); + if (video_vad_fid != NULL) { + fclose(video_vad_fid); + } + if (results_fid != NULL) { + fclose(results_fid); + } +} + +} // namespace webrtc + +int main(int argc, char* argv[]) { + char kUsage[] = + "\nCompute the number of misdetected and false-positive frames. Not\n" + " that for each frame of audio (10 ms) there should be one true\n" + " activity. If any video-based activity is given, there should also be\n" + " one probability per frame.\n" + "\nUsage:\n\n" + "activity_metric input_pcm [options]\n" + "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits " + "format.\n\n"; + google::SetUsageMessage(kUsage); + google::ParseCommandLineFlags(&argc, &argv, true); + webrtc::void_main(argc, argv); + return 0; +} diff --git a/webrtc/modules/audio_processing/agc/test/agc_harness.cc b/webrtc/modules/audio_processing/agc/test/agc_harness.cc new file mode 100644 index 000000000..d7c32b08d --- /dev/null +++ b/webrtc/modules/audio_processing/agc/test/agc_harness.cc @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Refer to kUsage below for a description. + +#include "gflags/gflags.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/modules/audio_processing/agc/test/agc_manager.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" +#include "webrtc/system_wrappers/interface/sleep.h" +#include "webrtc/system_wrappers/interface/trace.h" +#include "webrtc/test/channel_transport/include/channel_transport.h" +#include "webrtc/test/testsupport/trace_to_stderr.h" +#include "webrtc/voice_engine/include/voe_audio_processing.h" +#include "webrtc/voice_engine/include/voe_base.h" +#include "webrtc/voice_engine/include/voe_codec.h" +#include "webrtc/voice_engine/include/voe_external_media.h" +#include "webrtc/voice_engine/include/voe_file.h" +#include "webrtc/voice_engine/include/voe_hardware.h" +#include "webrtc/voice_engine/include/voe_network.h" +#include "webrtc/voice_engine/include/voe_volume_control.h" + +DEFINE_bool(codecs, false, "print out available codecs"); +DEFINE_int32(pt, 103, "codec payload type (defaults to ISAC/16000/1)"); +DEFINE_bool(internal, true, "use the internal AGC in 'serial' mode, or as the " + "first voice engine's AGC in parallel mode"); +DEFINE_bool(parallel, false, "run internal and public AGCs in parallel, with " + "left- and right-panning respectively. Not compatible with -aec."); +DEFINE_bool(devices, false, "print out capture devices and indexes to be used " + "with the capture flags"); +DEFINE_int32(capture1, 0, "capture device index for the first voice engine"); +DEFINE_int32(capture2, 0, "capture device index for second voice engine"); +DEFINE_int32(render1, 0, "render device index for first voice engine"); +DEFINE_int32(render2, 0, "render device index for second voice engine"); +DEFINE_bool(aec, false, "runs two voice engines in parallel, with the first " + "playing out a file and sending its captured signal to the second voice " + "engine. Also enables echo cancellation."); +DEFINE_bool(ns, true, "enable noise suppression"); +DEFINE_bool(highpass, true, "enable high pass filter"); +DEFINE_string(filename, "", "filename for the -aec mode"); + +namespace webrtc { +namespace { + +const char kUsage[] = + "\nWithout additional flags, sets up a simple VoiceEngine loopback call\n" + "with the default audio devices and runs forever. The internal AGC is\n" + "enabled and the public disabled.\n\n" + + "It can also run the public AGC in parallel with the internal, panned to\n" + "opposite stereo channels on the default render device. The capture\n" + "devices for each can be selected (recommended, because otherwise they\n" + "will fight for the level on the same device).\n\n" + + "Lastly, it can be used for local AEC testing. In this mode, the first\n" + "voice engine plays out a file over the selected render device (normally\n" + "loudspeakers) and records from the selected capture device. The second\n" + "voice engine receives the capture signal and plays it out over the\n" + "selected render device (normally headphones). This allows the user to\n" + "test an echo scenario with the first voice engine, while monitoring the\n" + "result with the second."; + +class AgcVoiceEngine { + public: + enum Pan { + NoPan, + PanLeft, + PanRight + }; + + AgcVoiceEngine(bool internal, int tx_port, int rx_port, int capture_idx, + int render_idx) + : voe_(VoiceEngine::Create()), + base_(VoEBase::GetInterface(voe_)), + hardware_(VoEHardware::GetInterface(voe_)), + codec_(VoECodec::GetInterface(voe_)), + manager_(new AgcManager(voe_)), + channel_(-1), + capture_idx_(capture_idx), + render_idx_(render_idx) { + SetUp(internal, tx_port, rx_port); + } + + ~AgcVoiceEngine() { + TearDown(); + } + + void SetUp(bool internal, int tx_port, int rx_port) { + ASSERT_TRUE(voe_ != NULL); + ASSERT_TRUE(base_ != NULL); + ASSERT_TRUE(hardware_ != NULL); + ASSERT_TRUE(codec_ != NULL); + VoEAudioProcessing* audio = VoEAudioProcessing::GetInterface(voe_); + ASSERT_TRUE(audio != NULL); + VoENetwork* network = VoENetwork::GetInterface(voe_); + ASSERT_TRUE(network != NULL); + + ASSERT_EQ(0, base_->Init()); + channel_ = base_->CreateChannel(); + ASSERT_NE(-1, channel_); + + channel_transport_.reset( + new test::VoiceChannelTransport(network, channel_)); + ASSERT_EQ(0, channel_transport_->SetSendDestination("127.0.0.1", tx_port)); + ASSERT_EQ(0, channel_transport_->SetLocalReceiver(rx_port)); + + ASSERT_EQ(0, hardware_->SetRecordingDevice(capture_idx_)); + ASSERT_EQ(0, hardware_->SetPlayoutDevice(render_idx_)); + + CodecInst codec_params = {0}; + bool codec_found = false; + for (int i = 0; i < codec_->NumOfCodecs(); i++) { + ASSERT_EQ(0, codec_->GetCodec(i, codec_params)); + if (FLAGS_pt == codec_params.pltype) { + codec_found = true; + break; + } + } + ASSERT_TRUE(codec_found); + ASSERT_EQ(0, codec_->SetSendCodec(channel_, codec_params)); + + ASSERT_EQ(0, audio->EnableHighPassFilter(FLAGS_highpass)); + ASSERT_EQ(0, audio->SetNsStatus(FLAGS_ns)); + ASSERT_EQ(0, audio->SetEcStatus(FLAGS_aec)); + + ASSERT_EQ(0, manager_->Enable(internal)); + ASSERT_EQ(0, audio->SetAgcStatus(!internal)); + + audio->Release(); + network->Release(); + } + + void TearDown() { + Stop(); + channel_transport_.reset(NULL); + ASSERT_EQ(0, base_->DeleteChannel(channel_)); + ASSERT_EQ(0, base_->Terminate()); + // Don't test; the manager hasn't released its interfaces. + hardware_->Release(); + base_->Release(); + codec_->Release(); + delete manager_; + ASSERT_TRUE(VoiceEngine::Delete(voe_)); + } + + void PrintDevices() { + int num_devices = 0; + char device_name[128] = {0}; + char guid[128] = {0}; + ASSERT_EQ(0, hardware_->GetNumOfRecordingDevices(num_devices)); + printf("Capture devices:\n"); + for (int i = 0; i < num_devices; i++) { + ASSERT_EQ(0, hardware_->GetRecordingDeviceName(i, device_name, guid)); + printf("%d: %s\n", i, device_name); + } + ASSERT_EQ(0, hardware_->GetNumOfPlayoutDevices(num_devices)); + printf("Render devices:\n"); + for (int i = 0; i < num_devices; i++) { + ASSERT_EQ(0, hardware_->GetPlayoutDeviceName(i, device_name, guid)); + printf("%d: %s\n", i, device_name); + } + } + + void PrintCodecs() { + CodecInst params = {0}; + printf("Codecs:\n"); + for (int i = 0; i < codec_->NumOfCodecs(); i++) { + ASSERT_EQ(0, codec_->GetCodec(i, params)); + printf("%d %s/%d/%d\n", params.pltype, params.plname, params.plfreq, + params.channels); + } + } + + void StartSending() { + ASSERT_EQ(0, base_->StartSend(channel_)); + } + + void StartPlaying(Pan pan, const std::string& filename) { + VoEVolumeControl* volume = VoEVolumeControl::GetInterface(voe_); + VoEFile* file = VoEFile::GetInterface(voe_); + ASSERT_TRUE(volume != NULL); + ASSERT_TRUE(file != NULL); + if (pan == PanLeft) { + volume->SetOutputVolumePan(channel_, 1, 0); + } else if (pan == PanRight) { + volume->SetOutputVolumePan(channel_, 0, 1); + } + if (filename != "") { + printf("playing file\n"); + ASSERT_EQ(0, file->StartPlayingFileLocally(channel_, filename.c_str(), + true, kFileFormatPcm16kHzFile, 1.0, 0, 0)); + } + ASSERT_EQ(0, base_->StartReceive(channel_)); + ASSERT_EQ(0, base_->StartPlayout(channel_)); + volume->Release(); + file->Release(); + } + + void Stop() { + ASSERT_EQ(0, base_->StopSend(channel_)); + ASSERT_EQ(0, base_->StopPlayout(channel_)); + } + + private: + VoiceEngine* voe_; + VoEBase* base_; + VoEHardware* hardware_; + VoECodec* codec_; + AgcManager* manager_; + int channel_; + int capture_idx_; + int render_idx_; + scoped_ptr channel_transport_; +}; + +void RunHarness() { + scoped_ptr voe1(new AgcVoiceEngine(FLAGS_internal, + 2000, + 2000, + FLAGS_capture1, + FLAGS_render1)); + scoped_ptr voe2; + if (FLAGS_parallel) { + voe2.reset(new AgcVoiceEngine(!FLAGS_internal, 3000, 3000, FLAGS_capture2, + FLAGS_render2)); + voe1->StartPlaying(AgcVoiceEngine::PanLeft, ""); + voe1->StartSending(); + voe2->StartPlaying(AgcVoiceEngine::PanRight, ""); + voe2->StartSending(); + } else if (FLAGS_aec) { + voe1.reset(new AgcVoiceEngine(FLAGS_internal, 2000, 4242, FLAGS_capture1, + FLAGS_render1)); + voe2.reset(new AgcVoiceEngine(!FLAGS_internal, 4242, 2000, FLAGS_capture2, + FLAGS_render2)); + voe1->StartPlaying(AgcVoiceEngine::NoPan, FLAGS_filename); + voe1->StartSending(); + voe2->StartPlaying(AgcVoiceEngine::NoPan, ""); + } else { + voe1->StartPlaying(AgcVoiceEngine::NoPan, ""); + voe1->StartSending(); + } + + // Run forever... + SleepMs(0x7fffffff); +} + +void PrintDevices() { + AgcVoiceEngine device_voe(false, 4242, 4242, 0, 0); + device_voe.PrintDevices(); +} + +void PrintCodecs() { + AgcVoiceEngine codec_voe(false, 4242, 4242, 0, 0); + codec_voe.PrintCodecs(); +} + +} // namespace +} // namespace webrtc + +int main(int argc, char** argv) { + google::SetUsageMessage(webrtc::kUsage); + google::ParseCommandLineFlags(&argc, &argv, true); + webrtc::test::TraceToStderr trace_to_stderr; + + if (FLAGS_parallel && FLAGS_aec) { + printf("-parallel and -aec are not compatible\n"); + return 1; + } + if (FLAGS_devices) { + webrtc::PrintDevices(); + } + if (FLAGS_codecs) { + webrtc::PrintCodecs(); + } + if (!FLAGS_devices && !FLAGS_codecs) { + webrtc::RunHarness(); + } + return 0; +} diff --git a/webrtc/modules/audio_processing/agc/test/agc_manager.cc b/webrtc/modules/audio_processing/agc/test/agc_manager.cc new file mode 100644 index 000000000..a741e6408 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/test/agc_manager.cc @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/test/agc_manager.h" + +#include + +#include "webrtc/modules/audio_processing/agc/agc.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" +#include "webrtc/system_wrappers/interface/logging.h" +#include "webrtc/voice_engine/include/voe_external_media.h" +#include "webrtc/voice_engine/include/voe_volume_control.h" + +namespace webrtc { + +class AgcManagerVolume : public VolumeCallbacks { + public: + // AgcManagerVolume acquires ownership of |volume|. + explicit AgcManagerVolume(VoEVolumeControl* volume) + : volume_(volume) { + } + + ~AgcManagerVolume() { + if (volume_) { + volume_->Release(); + } + } + + virtual void SetMicVolume(int volume) { + if (volume_->SetMicVolume(volume) != 0) { + LOG_FERR1(LS_WARNING, SetMicVolume, volume); + } + } + + int GetMicVolume() { + unsigned int volume = 0; + if (volume_->GetMicVolume(volume) != 0) { + LOG_FERR0(LS_WARNING, GetMicVolume); + return -1; + } + return volume; + } + + private: + VoEVolumeControl* volume_; +}; + +class MediaCallback : public VoEMediaProcess { + public: + MediaCallback(AgcManagerDirect* direct, AudioProcessing* audioproc, + CriticalSectionWrapper* crit) + : direct_(direct), + audioproc_(audioproc), + crit_(crit), + frame_() { + } + + protected: + virtual void Process(const int channel, const ProcessingTypes type, + int16_t audio[], const int samples_per_channel, + const int sample_rate_hz, const bool is_stereo) { + CriticalSectionScoped cs(crit_); + if (direct_->capture_muted()) { + return; + } + + // Extract the first channel. + const int kMaxSampleRateHz = 48000; + const int kMaxSamplesPerChannel = kMaxSampleRateHz / 100; + assert(samples_per_channel < kMaxSamplesPerChannel && + sample_rate_hz < kMaxSampleRateHz); + int16_t mono[kMaxSamplesPerChannel]; + int16_t* mono_ptr = audio; + if (is_stereo) { + for (int n = 0; n < samples_per_channel; n++) { + mono[n] = audio[n * 2]; + } + mono_ptr = mono; + } + + direct_->Process(mono_ptr, samples_per_channel, sample_rate_hz); + + // TODO(ajm): It's unfortunate we have to memcpy to this frame here, but + // it's needed for use with AudioProcessing. + frame_.num_channels_ = is_stereo ? 2 : 1; + frame_.samples_per_channel_ = samples_per_channel; + frame_.sample_rate_hz_ = sample_rate_hz; + const int length_samples = frame_.num_channels_ * samples_per_channel; + memcpy(frame_.data_, audio, length_samples * sizeof(int16_t)); + + // Apply compression to the audio. + if (audioproc_->ProcessStream(&frame_) != 0) { + LOG_FERR0(LS_ERROR, ProcessStream); + } + + // Copy the compressed audio back to voice engine's array. + memcpy(audio, frame_.data_, length_samples * sizeof(int16_t)); + } + + private: + AgcManagerDirect* direct_; + AudioProcessing* audioproc_; + CriticalSectionWrapper* crit_; + AudioFrame frame_; +}; + +class PreprocCallback : public VoEMediaProcess { + public: + PreprocCallback(AgcManagerDirect* direct, CriticalSectionWrapper* crit) + : direct_(direct), + crit_(crit) { + } + + protected: + virtual void Process(const int channel, const ProcessingTypes type, + int16_t audio[], const int samples_per_channel, + const int sample_rate_hz, const bool is_stereo) { + CriticalSectionScoped cs(crit_); + if (direct_->capture_muted()) { + return; + } + direct_->AnalyzePreProcess(audio, is_stereo ? 2 : 1, samples_per_channel); + } + + private: + AgcManagerDirect* direct_; + CriticalSectionWrapper* crit_; +}; + +AgcManager::AgcManager(VoiceEngine* voe) + : media_(VoEExternalMedia::GetInterface(voe)), + volume_callbacks_(new AgcManagerVolume(VoEVolumeControl::GetInterface( + voe))), + crit_(CriticalSectionWrapper::CreateCriticalSection()), + enabled_(false), + initialized_(false) { + Config config; + config.Set(new ExperimentalAgc(false)); + audioproc_.reset(AudioProcessing::Create(config)); + direct_.reset(new AgcManagerDirect(audioproc_->gain_control(), + volume_callbacks_.get())); + media_callback_.reset(new MediaCallback(direct_.get(), + audioproc_.get(), + crit_.get())); + preproc_callback_.reset(new PreprocCallback(direct_.get(), crit_.get())); +} + +AgcManager::AgcManager(VoEExternalMedia* media, VoEVolumeControl* volume, + Agc* agc, AudioProcessing* audioproc) + : media_(media), + volume_callbacks_(new AgcManagerVolume(volume)), + crit_(CriticalSectionWrapper::CreateCriticalSection()), + audioproc_(audioproc), + direct_(new AgcManagerDirect(agc, + audioproc_->gain_control(), + volume_callbacks_.get())), + media_callback_(new MediaCallback(direct_.get(), + audioproc_.get(), + crit_.get())), + preproc_callback_(new PreprocCallback(direct_.get(), crit_.get())), + enabled_(false), + initialized_(false) { +} + +AgcManager::AgcManager() + : media_(NULL), + enabled_(false), + initialized_(false) { +} + +AgcManager::~AgcManager() { + if (media_) { + if (enabled_) { + DeregisterCallbacks(); + } + media_->Release(); + } +} + +int AgcManager::Enable(bool enable) { + if (enable == enabled_) { + return 0; + } + if (!initialized_) { + CriticalSectionScoped cs(crit_.get()); + if (audioproc_->gain_control()->Enable(true) != 0) { + LOG_FERR1(LS_ERROR, gain_control()->Enable, true); + return -1; + } + if (direct_->Initialize() != 0) { + assert(false); + return -1; + } + initialized_ = true; + } + + if (enable) { + if (media_->RegisterExternalMediaProcessing(0, kRecordingAllChannelsMixed, + *media_callback_) != 0) { + LOG(LS_ERROR) << "Failed to register postproc callback"; + return -1; + } + if (media_->RegisterExternalMediaProcessing(0, kRecordingPreprocessing, + *preproc_callback_) != 0) { + LOG(LS_ERROR) << "Failed to register preproc callback"; + return -1; + } + } else { + if (DeregisterCallbacks() != 0) + return -1; + } + enabled_ = enable; + return 0; +} + +void AgcManager::CaptureDeviceChanged() { + CriticalSectionScoped cs(crit_.get()); + direct_->Initialize(); +} + +void AgcManager::SetCaptureMuted(bool muted) { + CriticalSectionScoped cs(crit_.get()); + direct_->SetCaptureMuted(muted); +} + +int AgcManager::DeregisterCallbacks() { + // DeRegister shares a lock with the Process() callback. This call will block + // until the callback is finished and it's safe to continue teardown. + int err = 0; + if (media_->DeRegisterExternalMediaProcessing(0, + kRecordingAllChannelsMixed) != 0) { + LOG(LS_ERROR) << "Failed to deregister postproc callback"; + err = -1; + } + if (media_->DeRegisterExternalMediaProcessing(0, + kRecordingPreprocessing) != 0) { + LOG(LS_ERROR) << "Failed to deregister preproc callback"; + err = -1; + } + return err; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/test/agc_manager.h b/webrtc/modules/audio_processing/agc/test/agc_manager.h new file mode 100644 index 000000000..ec8161cdb --- /dev/null +++ b/webrtc/modules/audio_processing/agc/test/agc_manager.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_TEST_AGC_MANAGER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_TEST_AGC_MANAGER_H_ + +#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h" +#include "webrtc/system_wrappers/interface/scoped_ptr.h" + +namespace webrtc { + +class Agc; +class AudioProcessing; +class CriticalSectionWrapper; +class MediaCallback; +class PreprocCallback; +class VoEExternalMedia; +class VoEVolumeControl; +class VoiceEngine; +class VolumeCallbacks; + +// Handles the interaction between VoiceEngine and the internal AGC. It hooks +// into the capture stream through VoiceEngine's external media interface and +// sends the audio to the AGC for analysis. It forwards requests for a capture +// volume change from the AGC to the VoiceEngine volume interface. +class AgcManager { + public: + explicit AgcManager(VoiceEngine* voe); + // Dependency injection for testing. Don't delete |agc| or |audioproc| as the + // memory is owned by the manager. If |media| or |volume| are non-fake + // reference counted classes, don't release them as this is handled by the + // manager. + AgcManager(VoEExternalMedia* media, VoEVolumeControl* volume, Agc* agc, + AudioProcessing* audioproc); + virtual ~AgcManager(); + + // When enabled, registers external media processing callbacks with + // VoiceEngine to hook into the capture stream. Disabling deregisters the + // callbacks. + virtual int Enable(bool enable); + virtual bool enabled() const { return enabled_; } + + // Call when the capture device has changed. This will trigger a retrieval of + // the initial capture volume on the next audio frame. + virtual void CaptureDeviceChanged(); + + // Call when the capture stream has been muted/unmuted. This causes the + // manager to disregard all incoming audio; chances are good it's background + // noise to which we'd like to avoid adapting. + virtual void SetCaptureMuted(bool muted); + virtual bool capture_muted() const { return direct_->capture_muted(); } + + protected: + // Provide a default constructor for testing. + AgcManager(); + + private: + int DeregisterCallbacks(); + int CheckVolumeAndReset(); + + VoEExternalMedia* media_; + scoped_ptr volume_callbacks_; + scoped_ptr crit_; + scoped_ptr audioproc_; + scoped_ptr direct_; + scoped_ptr media_callback_; + scoped_ptr preproc_callback_; + bool enabled_; + bool initialized_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_TEST_AGC_MANAGER_H_ diff --git a/webrtc/modules/audio_processing/agc/test/agc_test.cc b/webrtc/modules/audio_processing/agc/test/agc_test.cc new file mode 100644 index 000000000..d855f42f2 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/test/agc_test.cc @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include + +#include "gflags/gflags.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/modules/audio_processing/agc/agc.h" +#include "webrtc/modules/audio_processing/agc/test/agc_manager.h" +#include "webrtc/modules/audio_processing/agc/test/test_utils.h" +#include "webrtc/modules/audio_processing/agc/utility.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/logging.h" +#include "webrtc/test/testsupport/trace_to_stderr.h" +#include "webrtc/voice_engine/include/mock/fake_voe_external_media.h" +#include "webrtc/voice_engine/include/mock/mock_voe_volume_control.h" + +DEFINE_string(in, "in.pcm", "input filename"); +DEFINE_string(out, "out.pcm", "output filename"); +DEFINE_int32(rate, 16000, "sample rate in Hz"); +DEFINE_int32(channels, 1, "number of channels"); +DEFINE_int32(level, -18, "target level in RMS dBFs [-100, 0]"); +DEFINE_bool(limiter, true, "enable a limiter for the compression stage"); +DEFINE_int32(cmp_level, 2, "target level in dBFs for the compression stage"); +DEFINE_int32(mic_gain, 80, "range of gain provided by the virtual mic in dB"); +DEFINE_int32(gain_offset, 0, + "an amount (in dB) to add to every entry in the gain map"); +DEFINE_string(gain_file, "", + "filename providing a mic gain mapping. The file should be text containing " + "a (floating-point) gain entry in dBFs per line corresponding to levels " + "from 0 to 255."); + +using ::testing::_; +using ::testing::ByRef; +using ::testing::DoAll; +using ::testing::Mock; +using ::testing::Return; +using ::testing::SaveArg; +using ::testing::SetArgReferee; + +namespace webrtc { +namespace { + +const char kUsage[] = "\nProcess an audio file to simulate an analog agc."; + +void ReadGainMapFromFile(FILE* file, int offset, int gain_map[256]) { + for (int i = 0; i < 256; ++i) { + float gain = 0; + ASSERT_EQ(1, fscanf(file, "%f", &gain)); + gain_map[i] = std::floor(gain + 0.5); + } + + // Adjust from dBFs to gain in dB. We assume that level 127 provides 0 dB + // gain. This corresponds to the interpretation in MicLevel2Gain(). + const int midpoint = gain_map[127]; + printf("Gain map\n"); + for (int i = 0; i < 256; ++i) { + gain_map[i] += offset - midpoint; + if (i % 5 == 0) { + printf("%d: %d dB\n", i, gain_map[i]); + } + } +} + +void CalculateGainMap(int gain_range_db, int offset, int gain_map[256]) { + printf("Gain map\n"); + for (int i = 0; i < 256; ++i) { + gain_map[i] = std::floor(MicLevel2Gain(gain_range_db, i) + 0.5) + offset; + if (i % 5 == 0) { + printf("%d: %d dB\n", i, gain_map[i]); + } + } +} + +void RunAgc() { + test::TraceToStderr trace_to_stderr(true); + FILE* in_file = fopen(FLAGS_in.c_str(), "rb"); + ASSERT_TRUE(in_file != NULL); + FILE* out_file = fopen(FLAGS_out.c_str(), "wb"); + ASSERT_TRUE(out_file != NULL); + + int gain_map[256]; + if (FLAGS_gain_file != "") { + FILE* gain_file = fopen(FLAGS_gain_file.c_str(), "rt"); + ASSERT_TRUE(gain_file != NULL); + ReadGainMapFromFile(gain_file, FLAGS_gain_offset, gain_map); + fclose(gain_file); + } else { + CalculateGainMap(FLAGS_mic_gain, FLAGS_gain_offset, gain_map); + } + + FakeVoEExternalMedia media; + MockVoEVolumeControl volume; + Agc* agc = new Agc; + AudioProcessing* audioproc = AudioProcessing::Create(0); + ASSERT_TRUE(audioproc != NULL); + AgcManager manager(&media, &volume, agc, audioproc); + + int mic_level = 128; + int last_mic_level = mic_level; + EXPECT_CALL(volume, GetMicVolume(_)) + .WillRepeatedly(DoAll(SetArgReferee<0>(ByRef(mic_level)), Return(0))); + EXPECT_CALL(volume, SetMicVolume(_)) + .WillRepeatedly(DoAll(SaveArg<0>(&mic_level), Return(0))); + + manager.Enable(true); + ASSERT_EQ(0, agc->set_target_level_dbfs(FLAGS_level)); + const AudioProcessing::Error kNoErr = AudioProcessing::kNoError; + GainControl* gctrl = audioproc->gain_control(); + ASSERT_EQ(kNoErr, gctrl->set_target_level_dbfs(FLAGS_cmp_level)); + ASSERT_EQ(kNoErr, gctrl->enable_limiter(FLAGS_limiter)); + + AudioFrame frame; + frame.num_channels_ = FLAGS_channels; + frame.sample_rate_hz_ = FLAGS_rate; + frame.samples_per_channel_ = FLAGS_rate / 100; + const size_t frame_length = frame.samples_per_channel_ * FLAGS_channels; + size_t sample_count = 0; + while (fread(frame.data_, sizeof(int16_t), frame_length, in_file) == + frame_length) { + SimulateMic(gain_map, mic_level, last_mic_level, &frame); + last_mic_level = mic_level; + media.CallProcess(kRecordingAllChannelsMixed, frame.data_, + frame.samples_per_channel_, FLAGS_rate, FLAGS_channels); + ASSERT_EQ(frame_length, + fwrite(frame.data_, sizeof(int16_t), frame_length, out_file)); + sample_count += frame_length; + trace_to_stderr.SetTimeSeconds(static_cast(sample_count) / + FLAGS_channels / FLAGS_rate); + } + fclose(in_file); + fclose(out_file); + EXPECT_CALL(volume, Release()); +} + +} // namespace +} // namespace webrtc + +int main(int argc, char* argv[]) { + google::SetUsageMessage(webrtc::kUsage); + google::ParseCommandLineFlags(&argc, &argv, true); + webrtc::RunAgc(); + return 0; +} diff --git a/webrtc/tools/tools.gyp b/webrtc/tools/tools.gyp index 102ba8ec3..edac118e4 100644 --- a/webrtc/tools/tools.gyp +++ b/webrtc/tools/tools.gyp @@ -101,6 +101,62 @@ 'conditions': [ ['include_tests==1', { 'targets' : [ + { + 'target_name': 'agc_manager', + 'type': 'static_library', + 'dependencies': [ + '<(webrtc_root)/common_audio/common_audio.gyp:common_audio', + '<(webrtc_root)/modules/modules.gyp:audio_processing', + '<(webrtc_root)/voice_engine/voice_engine.gyp:voice_engine', + ], + 'sources': [ + '<(webrtc_root)/modules/audio_processing/agc/test/agc_manager.cc', + '<(webrtc_root)/modules/audio_processing/agc/test/agc_manager.h', + ], + }, + { + 'target_name': 'agc_harness', + 'type': 'executable', + 'dependencies': [ + '<(DEPTH)/testing/gtest.gyp:gtest', + '<(DEPTH)/third_party/gflags/gflags.gyp:gflags', + '<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers_default', + '<(webrtc_root)/test/test.gyp:channel_transport', + '<(webrtc_root)/test/test.gyp:test_support', + 'agc_manager', + ], + 'sources': [ + '<(webrtc_root)/modules/audio_processing/agc/test/agc_harness.cc', + ], + }, # agc_harness + { + 'target_name': 'agc_proc', + 'type': 'executable', + 'dependencies': [ + '<(DEPTH)/testing/gmock.gyp:gmock', + '<(DEPTH)/testing/gtest.gyp:gtest', + '<(DEPTH)/third_party/gflags/gflags.gyp:gflags', + '<(webrtc_root)/test/test.gyp:test_support', + '<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers_default', + 'agc_manager', + ], + 'sources': [ + '<(webrtc_root)/modules/audio_processing/agc/test/agc_test.cc', + '<(webrtc_root)/modules/audio_processing/agc/test/test_utils.cc', + ], + }, # agc_proc + { + 'target_name': 'activity_metric', + 'type': 'executable', + 'dependencies': [ + '<(DEPTH)/testing/gtest.gyp:gtest', + '<(DEPTH)/third_party/gflags/gflags.gyp:gflags', + 'agc_manager', + ], + 'sources': [ + '<(webrtc_root)/modules/audio_processing/agc/test/activity_metric.cc', + ], + }, # activity_metric { 'target_name': 'audio_e2e_harness', 'type': 'executable', diff --git a/webrtc/webrtc.gyp b/webrtc/webrtc.gyp index f9ed8838f..91bde7e1e 100644 --- a/webrtc/webrtc.gyp +++ b/webrtc/webrtc.gyp @@ -85,13 +85,13 @@ ], 'conditions': [ # TODO(andresp): Chromium libpeerconnection should link directly with - # this and no if conditions should be needed on webrtc build files. + # this and no if conditions should be needed on webrtc build files. ['build_with_chromium==1', { - 'dependencies': [ - '<(webrtc_root)/modules/modules.gyp:video_capture_module_impl', - '<(webrtc_root)/modules/modules.gyp:video_render_module_impl', - ], - }], + 'dependencies': [ + '<(webrtc_root)/modules/modules.gyp:video_capture_module_impl', + '<(webrtc_root)/modules/modules.gyp:video_render_module_impl', + ], + }], ], }, ],