diff --git a/data/audio_processing/output_data_float.pb b/data/audio_processing/output_data_float.pb index b2b934dd8..cadc1d300 100644 Binary files a/data/audio_processing/output_data_float.pb and b/data/audio_processing/output_data_float.pb differ diff --git a/src/modules/audio_processing/include/audio_processing.h b/src/modules/audio_processing/include/audio_processing.h index 1fbd0b4d7..75b3e2073 100644 --- a/src/modules/audio_processing/include/audio_processing.h +++ b/src/modules/audio_processing/include/audio_processing.h @@ -545,6 +545,11 @@ class NoiseSuppression { virtual int set_level(Level level) = 0; virtual Level level() const = 0; + // Returns the internally computed prior speech probability of current frame + // averaged over output channels. This is not supported in fixed point, for + // which |kUnsupportedFunctionError| is returned. + virtual float speech_probability() const = 0; + protected: virtual ~NoiseSuppression() {}; }; diff --git a/src/modules/audio_processing/noise_suppression_impl.cc b/src/modules/audio_processing/noise_suppression_impl.cc index c44d3fedd..d6162e638 100644 --- a/src/modules/audio_processing/noise_suppression_impl.cc +++ b/src/modules/audio_processing/noise_suppression_impl.cc @@ -110,6 +110,20 @@ NoiseSuppression::Level NoiseSuppressionImpl::level() const { return level_; } +float NoiseSuppressionImpl::speech_probability() const { +#if defined(WEBRTC_NS_FLOAT) + float probability_average = 0.0f; + for (int i = 0; i < num_handles(); i++) { + Handle* my_handle = static_cast(handle(i)); + probability_average += WebRtcNs_prior_speech_probability(my_handle); + } + return probability_average / num_handles(); +#elif defined(WEBRTC_NS_FIXED) + // Currently not available for the fixed point implementation. + return apm_->kUnsupportedFunctionError; +#endif +} + void* NoiseSuppressionImpl::CreateHandle() const { Handle* handle = NULL; #if defined(WEBRTC_NS_FLOAT) diff --git a/src/modules/audio_processing/noise_suppression_impl.h b/src/modules/audio_processing/noise_suppression_impl.h index 7b65b7083..73a23228a 100644 --- a/src/modules/audio_processing/noise_suppression_impl.h +++ b/src/modules/audio_processing/noise_suppression_impl.h @@ -28,6 +28,7 @@ class NoiseSuppressionImpl : public NoiseSuppression, // NoiseSuppression implementation. virtual bool is_enabled() const; + float speech_probability() const; private: // NoiseSuppression implementation. diff --git a/src/modules/audio_processing/ns/include/noise_suppression.h b/src/modules/audio_processing/ns/include/noise_suppression.h index 1f498c1cd..c9a8e3234 100644 --- a/src/modules/audio_processing/ns/include/noise_suppression.h +++ b/src/modules/audio_processing/ns/include/noise_suppression.h @@ -20,14 +20,14 @@ extern "C" { #endif /* - * This function creates an instance to the noise reduction structure + * This function creates an instance to the noise suppression structure * * Input: - * - NS_inst : Pointer to noise reduction instance that should be + * - NS_inst : Pointer to noise suppression instance that should be * created * * Output: - * - NS_inst : Pointer to created noise reduction instance + * - NS_inst : Pointer to created noise suppression instance * * Return value : 0 - Ok * -1 - Error @@ -36,7 +36,7 @@ int WebRtcNs_Create(NsHandle** NS_inst); /* - * This function frees the dynamic memory of a specified Noise Reduction + * This function frees the dynamic memory of a specified noise suppression * instance. * * Input: @@ -49,7 +49,8 @@ int WebRtcNs_Free(NsHandle* NS_inst); /* - * This function initializes a NS instance + * This function initializes a NS instance and has to be called before any other + * processing is made. * * Input: * - NS_inst : Instance that should be initialized @@ -67,11 +68,11 @@ int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs); * This changes the aggressiveness of the noise suppression method. * * Input: - * - NS_inst : Instance that should be initialized + * - NS_inst : Noise suppression instance. * - mode : 0: Mild, 1: Medium , 2: Aggressive * * Output: - * - NS_inst : Initialized instance + * - NS_inst : Updated instance. * * Return value : 0 - Ok * -1 - Error @@ -84,7 +85,7 @@ int WebRtcNs_set_policy(NsHandle* NS_inst, int mode); * input and output signals should always be 10ms (80 or 160 samples). * * Input - * - NS_inst : NS Instance. Needs to be initiated before call. + * - NS_inst : Noise suppression instance. * - spframe : Pointer to speech frame buffer for L band * - spframe_H : Pointer to speech frame buffer for H band * - fs : sampling frequency @@ -103,6 +104,18 @@ int WebRtcNs_Process(NsHandle* NS_inst, short* outframe, short* outframe_H); +/* Returns the internally used prior speech probability of the current frame. + * There is a frequency bin based one as well, with which this should not be + * confused. + * + * Input + * - handle : Noise suppression instance. + * + * Return value : Prior speech probability in interval [0.0, 1.0]. + * -1 - NULL pointer or uninitialized instance. + */ +float WebRtcNs_prior_speech_probability(NsHandle* handle); + #ifdef __cplusplus } #endif diff --git a/src/modules/audio_processing/ns/noise_suppression.c b/src/modules/audio_processing/ns/noise_suppression.c index a1e5ae8bd..6684b8286 100644 --- a/src/modules/audio_processing/ns/noise_suppression.c +++ b/src/modules/audio_processing/ns/noise_suppression.c @@ -46,3 +46,14 @@ int WebRtcNs_Process(NsHandle* NS_inst, short* spframe, short* spframe_H, return WebRtcNs_ProcessCore( (NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H); } + +float WebRtcNs_prior_speech_probability(NsHandle* handle) { + NSinst_t* self = (NSinst_t*) handle; + if (handle == NULL) { + return -1; + } + if (self->initFlag == 0) { + return -1; + } + return self->priorSpeechProb; +} diff --git a/src/modules/audio_processing/test/process_test.cc b/src/modules/audio_processing/test/process_test.cc index 0a49c13d5..aa432ff3b 100644 --- a/src/modules/audio_processing/test/process_test.cc +++ b/src/modules/audio_processing/test/process_test.cc @@ -115,6 +115,7 @@ void usage() { printf(" --ns_moderate\n"); printf(" --ns_high\n"); printf(" --ns_very_high\n"); + printf(" --ns_prob_file FILE\n"); printf("\n -vad Voice activity detection\n"); printf(" --vad_out_file FILE\n"); printf("\n Level metrics (enabled by default)\n"); @@ -149,6 +150,7 @@ void void_main(int argc, char* argv[]) { const char* near_filename = NULL; const char* out_filename = NULL; const char* vad_out_filename = NULL; + const char* ns_prob_filename = NULL; const char* aecm_echo_path_in_filename = NULL; const char* aecm_echo_path_out_filename = NULL; @@ -336,6 +338,11 @@ void void_main(int argc, char* argv[]) { ASSERT_EQ(apm->kNoError, apm->noise_suppression()->set_level(NoiseSuppression::kVeryHigh)); + } else if (strcmp(argv[i], "--ns_prob_file") == 0) { + i++; + ASSERT_LT(i, argc) << "Specify filename after --ns_prob_file"; + ns_prob_filename = argv[i]; + } else if (strcmp(argv[i], "-vad") == 0) { ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); @@ -390,6 +397,7 @@ void void_main(int argc, char* argv[]) { const char delay_filename[] = "apm_delay.dat"; const char drift_filename[] = "apm_drift.dat"; const char vad_file_default[] = "vad_out.dat"; + const char ns_prob_file_default[] = "ns_prob.dat"; if (!simulating) { far_filename = far_file_default; @@ -404,6 +412,10 @@ void void_main(int argc, char* argv[]) { vad_out_filename = vad_file_default; } + if (!ns_prob_filename) { + ns_prob_filename = ns_prob_file_default; + } + FILE* pb_file = NULL; FILE* far_file = NULL; FILE* near_file = NULL; @@ -412,6 +424,7 @@ void void_main(int argc, char* argv[]) { FILE* delay_file = NULL; FILE* drift_file = NULL; FILE* vad_out_file = NULL; + FILE* ns_prob_file = NULL; FILE* aecm_echo_path_in_file = NULL; FILE* aecm_echo_path_out_file = NULL; @@ -466,6 +479,12 @@ void void_main(int argc, char* argv[]) { << vad_out_file; } + if (apm->noise_suppression()->is_enabled()) { + ns_prob_file = fopen(ns_prob_filename, "wb"); + ASSERT_TRUE(NULL != ns_prob_file) << "Unable to open NS output file " + << ns_prob_file; + } + if (aecm_echo_path_in_filename != NULL) { aecm_echo_path_in_file = fopen(aecm_echo_path_in_filename, "rb"); ASSERT_TRUE(NULL != aecm_echo_path_in_file) << "Unable to open file " @@ -504,6 +523,7 @@ void void_main(int argc, char* argv[]) { int drift_samples = 0; int capture_level = 127; int8_t stream_has_voice = 0; + float ns_speech_prob = 0.0f; TickTime t0 = TickTime::Now(); TickTime t1 = t0; @@ -643,6 +663,14 @@ void void_main(int argc, char* argv[]) { vad_out_file)); } + if (ns_prob_file != NULL) { + ns_speech_prob = apm->noise_suppression()->speech_probability(); + ASSERT_EQ(1u, fwrite(&ns_speech_prob, + sizeof(ns_speech_prob), + 1, + ns_prob_file)); + } + if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) { ASSERT_EQ(msg.level(), capture_level); } @@ -842,6 +870,14 @@ void void_main(int argc, char* argv[]) { vad_out_file)); } + if (ns_prob_file != NULL) { + ns_speech_prob = apm->noise_suppression()->speech_probability(); + ASSERT_EQ(1u, fwrite(&ns_speech_prob, + sizeof(ns_speech_prob), + 1, + ns_prob_file)); + } + if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) { ASSERT_EQ(capture_level_in, capture_level); } diff --git a/src/modules/audio_processing/test/unit_test.cc b/src/modules/audio_processing/test/unit_test.cc index b000f43a7..3e28fb31a 100644 --- a/src/modules/audio_processing/test/unit_test.cc +++ b/src/modules/audio_processing/test/unit_test.cc @@ -1278,6 +1278,7 @@ TEST_F(ApmTest, Process) { int analog_level = 127; int analog_level_average = 0; int max_output_average = 0; + float ns_speech_prob_average = 0.0f; while (1) { if (!ReadFrame(far_file_, revframe_)) break; @@ -1314,6 +1315,8 @@ TEST_F(ApmTest, Process) { EXPECT_EQ(AudioFrame::kVadPassive, frame_->vad_activity_); } + ns_speech_prob_average += apm_->noise_suppression()->speech_probability(); + size_t frame_size = frame_->samples_per_channel_ * frame_->num_channels_; size_t write_count = fwrite(frame_->data_, sizeof(int16_t), @@ -1327,6 +1330,7 @@ TEST_F(ApmTest, Process) { } max_output_average /= frame_count; analog_level_average /= frame_count; + ns_speech_prob_average /= frame_count; #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) EchoCancellation::Metrics echo_metrics; @@ -1368,6 +1372,9 @@ TEST_F(ApmTest, Process) { EXPECT_EQ(reference_delay.std(), std); EXPECT_EQ(test->rms_level(), rms_level); + + EXPECT_FLOAT_EQ(test->ns_speech_probability_average(), + ns_speech_prob_average); #endif } else { test->set_has_echo_count(has_echo_count); @@ -1395,6 +1402,10 @@ TEST_F(ApmTest, Process) { message_delay->set_std(std); test->set_rms_level(rms_level); + + EXPECT_LE(0.0f, ns_speech_prob_average); + EXPECT_GE(1.0f, ns_speech_prob_average); + test->set_ns_speech_probability_average(ns_speech_prob_average); #endif } diff --git a/src/modules/audio_processing/test/unittest.proto b/src/modules/audio_processing/test/unittest.proto index 67ba722b3..09ec94224 100644 --- a/src/modules/audio_processing/test/unittest.proto +++ b/src/modules/audio_processing/test/unittest.proto @@ -44,6 +44,9 @@ message Test { optional DelayMetrics delay_metrics = 12; optional int32 rms_level = 13; + + optional float ns_speech_probability_average = 14; + } message OutputData {