Added API to port internal speech probability in NS.
Identical with CL652007 that's already been accepted for commit. TBR=andrew@webrtc.org BUG=None TEST=None Review URL: https://webrtc-codereview.appspot.com/670009 git-svn-id: http://webrtc.googlecode.com/svn/trunk@2511 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
parent
6182db10c8
commit
08329f4a13
Binary file not shown.
@ -545,6 +545,11 @@ class NoiseSuppression {
|
|||||||
virtual int set_level(Level level) = 0;
|
virtual int set_level(Level level) = 0;
|
||||||
virtual Level level() const = 0;
|
virtual Level level() const = 0;
|
||||||
|
|
||||||
|
// Returns the internally computed prior speech probability of current frame
|
||||||
|
// averaged over output channels. This is not supported in fixed point, for
|
||||||
|
// which |kUnsupportedFunctionError| is returned.
|
||||||
|
virtual float speech_probability() const = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual ~NoiseSuppression() {};
|
virtual ~NoiseSuppression() {};
|
||||||
};
|
};
|
||||||
|
@ -110,6 +110,20 @@ NoiseSuppression::Level NoiseSuppressionImpl::level() const {
|
|||||||
return level_;
|
return level_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float NoiseSuppressionImpl::speech_probability() const {
|
||||||
|
#if defined(WEBRTC_NS_FLOAT)
|
||||||
|
float probability_average = 0.0f;
|
||||||
|
for (int i = 0; i < num_handles(); i++) {
|
||||||
|
Handle* my_handle = static_cast<Handle*>(handle(i));
|
||||||
|
probability_average += WebRtcNs_prior_speech_probability(my_handle);
|
||||||
|
}
|
||||||
|
return probability_average / num_handles();
|
||||||
|
#elif defined(WEBRTC_NS_FIXED)
|
||||||
|
// Currently not available for the fixed point implementation.
|
||||||
|
return apm_->kUnsupportedFunctionError;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void* NoiseSuppressionImpl::CreateHandle() const {
|
void* NoiseSuppressionImpl::CreateHandle() const {
|
||||||
Handle* handle = NULL;
|
Handle* handle = NULL;
|
||||||
#if defined(WEBRTC_NS_FLOAT)
|
#if defined(WEBRTC_NS_FLOAT)
|
||||||
|
@ -28,6 +28,7 @@ class NoiseSuppressionImpl : public NoiseSuppression,
|
|||||||
|
|
||||||
// NoiseSuppression implementation.
|
// NoiseSuppression implementation.
|
||||||
virtual bool is_enabled() const;
|
virtual bool is_enabled() const;
|
||||||
|
float speech_probability() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// NoiseSuppression implementation.
|
// NoiseSuppression implementation.
|
||||||
|
@ -20,14 +20,14 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function creates an instance to the noise reduction structure
|
* This function creates an instance to the noise suppression structure
|
||||||
*
|
*
|
||||||
* Input:
|
* Input:
|
||||||
* - NS_inst : Pointer to noise reduction instance that should be
|
* - NS_inst : Pointer to noise suppression instance that should be
|
||||||
* created
|
* created
|
||||||
*
|
*
|
||||||
* Output:
|
* Output:
|
||||||
* - NS_inst : Pointer to created noise reduction instance
|
* - NS_inst : Pointer to created noise suppression instance
|
||||||
*
|
*
|
||||||
* Return value : 0 - Ok
|
* Return value : 0 - Ok
|
||||||
* -1 - Error
|
* -1 - Error
|
||||||
@ -36,7 +36,7 @@ int WebRtcNs_Create(NsHandle** NS_inst);
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function frees the dynamic memory of a specified Noise Reduction
|
* This function frees the dynamic memory of a specified noise suppression
|
||||||
* instance.
|
* instance.
|
||||||
*
|
*
|
||||||
* Input:
|
* Input:
|
||||||
@ -49,7 +49,8 @@ int WebRtcNs_Free(NsHandle* NS_inst);
|
|||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This function initializes a NS instance
|
* This function initializes a NS instance and has to be called before any other
|
||||||
|
* processing is made.
|
||||||
*
|
*
|
||||||
* Input:
|
* Input:
|
||||||
* - NS_inst : Instance that should be initialized
|
* - NS_inst : Instance that should be initialized
|
||||||
@ -67,11 +68,11 @@ int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs);
|
|||||||
* This changes the aggressiveness of the noise suppression method.
|
* This changes the aggressiveness of the noise suppression method.
|
||||||
*
|
*
|
||||||
* Input:
|
* Input:
|
||||||
* - NS_inst : Instance that should be initialized
|
* - NS_inst : Noise suppression instance.
|
||||||
* - mode : 0: Mild, 1: Medium , 2: Aggressive
|
* - mode : 0: Mild, 1: Medium , 2: Aggressive
|
||||||
*
|
*
|
||||||
* Output:
|
* Output:
|
||||||
* - NS_inst : Initialized instance
|
* - NS_inst : Updated instance.
|
||||||
*
|
*
|
||||||
* Return value : 0 - Ok
|
* Return value : 0 - Ok
|
||||||
* -1 - Error
|
* -1 - Error
|
||||||
@ -84,7 +85,7 @@ int WebRtcNs_set_policy(NsHandle* NS_inst, int mode);
|
|||||||
* input and output signals should always be 10ms (80 or 160 samples).
|
* input and output signals should always be 10ms (80 or 160 samples).
|
||||||
*
|
*
|
||||||
* Input
|
* Input
|
||||||
* - NS_inst : NS Instance. Needs to be initiated before call.
|
* - NS_inst : Noise suppression instance.
|
||||||
* - spframe : Pointer to speech frame buffer for L band
|
* - spframe : Pointer to speech frame buffer for L band
|
||||||
* - spframe_H : Pointer to speech frame buffer for H band
|
* - spframe_H : Pointer to speech frame buffer for H band
|
||||||
* - fs : sampling frequency
|
* - fs : sampling frequency
|
||||||
@ -103,6 +104,18 @@ int WebRtcNs_Process(NsHandle* NS_inst,
|
|||||||
short* outframe,
|
short* outframe,
|
||||||
short* outframe_H);
|
short* outframe_H);
|
||||||
|
|
||||||
|
/* Returns the internally used prior speech probability of the current frame.
|
||||||
|
* There is a frequency bin based one as well, with which this should not be
|
||||||
|
* confused.
|
||||||
|
*
|
||||||
|
* Input
|
||||||
|
* - handle : Noise suppression instance.
|
||||||
|
*
|
||||||
|
* Return value : Prior speech probability in interval [0.0, 1.0].
|
||||||
|
* -1 - NULL pointer or uninitialized instance.
|
||||||
|
*/
|
||||||
|
float WebRtcNs_prior_speech_probability(NsHandle* handle);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -46,3 +46,14 @@ int WebRtcNs_Process(NsHandle* NS_inst, short* spframe, short* spframe_H,
|
|||||||
return WebRtcNs_ProcessCore(
|
return WebRtcNs_ProcessCore(
|
||||||
(NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H);
|
(NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float WebRtcNs_prior_speech_probability(NsHandle* handle) {
|
||||||
|
NSinst_t* self = (NSinst_t*) handle;
|
||||||
|
if (handle == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (self->initFlag == 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return self->priorSpeechProb;
|
||||||
|
}
|
||||||
|
@ -115,6 +115,7 @@ void usage() {
|
|||||||
printf(" --ns_moderate\n");
|
printf(" --ns_moderate\n");
|
||||||
printf(" --ns_high\n");
|
printf(" --ns_high\n");
|
||||||
printf(" --ns_very_high\n");
|
printf(" --ns_very_high\n");
|
||||||
|
printf(" --ns_prob_file FILE\n");
|
||||||
printf("\n -vad Voice activity detection\n");
|
printf("\n -vad Voice activity detection\n");
|
||||||
printf(" --vad_out_file FILE\n");
|
printf(" --vad_out_file FILE\n");
|
||||||
printf("\n Level metrics (enabled by default)\n");
|
printf("\n Level metrics (enabled by default)\n");
|
||||||
@ -149,6 +150,7 @@ void void_main(int argc, char* argv[]) {
|
|||||||
const char* near_filename = NULL;
|
const char* near_filename = NULL;
|
||||||
const char* out_filename = NULL;
|
const char* out_filename = NULL;
|
||||||
const char* vad_out_filename = NULL;
|
const char* vad_out_filename = NULL;
|
||||||
|
const char* ns_prob_filename = NULL;
|
||||||
const char* aecm_echo_path_in_filename = NULL;
|
const char* aecm_echo_path_in_filename = NULL;
|
||||||
const char* aecm_echo_path_out_filename = NULL;
|
const char* aecm_echo_path_out_filename = NULL;
|
||||||
|
|
||||||
@ -336,6 +338,11 @@ void void_main(int argc, char* argv[]) {
|
|||||||
ASSERT_EQ(apm->kNoError,
|
ASSERT_EQ(apm->kNoError,
|
||||||
apm->noise_suppression()->set_level(NoiseSuppression::kVeryHigh));
|
apm->noise_suppression()->set_level(NoiseSuppression::kVeryHigh));
|
||||||
|
|
||||||
|
} else if (strcmp(argv[i], "--ns_prob_file") == 0) {
|
||||||
|
i++;
|
||||||
|
ASSERT_LT(i, argc) << "Specify filename after --ns_prob_file";
|
||||||
|
ns_prob_filename = argv[i];
|
||||||
|
|
||||||
} else if (strcmp(argv[i], "-vad") == 0) {
|
} else if (strcmp(argv[i], "-vad") == 0) {
|
||||||
ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true));
|
ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true));
|
||||||
|
|
||||||
@ -390,6 +397,7 @@ void void_main(int argc, char* argv[]) {
|
|||||||
const char delay_filename[] = "apm_delay.dat";
|
const char delay_filename[] = "apm_delay.dat";
|
||||||
const char drift_filename[] = "apm_drift.dat";
|
const char drift_filename[] = "apm_drift.dat";
|
||||||
const char vad_file_default[] = "vad_out.dat";
|
const char vad_file_default[] = "vad_out.dat";
|
||||||
|
const char ns_prob_file_default[] = "ns_prob.dat";
|
||||||
|
|
||||||
if (!simulating) {
|
if (!simulating) {
|
||||||
far_filename = far_file_default;
|
far_filename = far_file_default;
|
||||||
@ -404,6 +412,10 @@ void void_main(int argc, char* argv[]) {
|
|||||||
vad_out_filename = vad_file_default;
|
vad_out_filename = vad_file_default;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!ns_prob_filename) {
|
||||||
|
ns_prob_filename = ns_prob_file_default;
|
||||||
|
}
|
||||||
|
|
||||||
FILE* pb_file = NULL;
|
FILE* pb_file = NULL;
|
||||||
FILE* far_file = NULL;
|
FILE* far_file = NULL;
|
||||||
FILE* near_file = NULL;
|
FILE* near_file = NULL;
|
||||||
@ -412,6 +424,7 @@ void void_main(int argc, char* argv[]) {
|
|||||||
FILE* delay_file = NULL;
|
FILE* delay_file = NULL;
|
||||||
FILE* drift_file = NULL;
|
FILE* drift_file = NULL;
|
||||||
FILE* vad_out_file = NULL;
|
FILE* vad_out_file = NULL;
|
||||||
|
FILE* ns_prob_file = NULL;
|
||||||
FILE* aecm_echo_path_in_file = NULL;
|
FILE* aecm_echo_path_in_file = NULL;
|
||||||
FILE* aecm_echo_path_out_file = NULL;
|
FILE* aecm_echo_path_out_file = NULL;
|
||||||
|
|
||||||
@ -466,6 +479,12 @@ void void_main(int argc, char* argv[]) {
|
|||||||
<< vad_out_file;
|
<< vad_out_file;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (apm->noise_suppression()->is_enabled()) {
|
||||||
|
ns_prob_file = fopen(ns_prob_filename, "wb");
|
||||||
|
ASSERT_TRUE(NULL != ns_prob_file) << "Unable to open NS output file "
|
||||||
|
<< ns_prob_file;
|
||||||
|
}
|
||||||
|
|
||||||
if (aecm_echo_path_in_filename != NULL) {
|
if (aecm_echo_path_in_filename != NULL) {
|
||||||
aecm_echo_path_in_file = fopen(aecm_echo_path_in_filename, "rb");
|
aecm_echo_path_in_file = fopen(aecm_echo_path_in_filename, "rb");
|
||||||
ASSERT_TRUE(NULL != aecm_echo_path_in_file) << "Unable to open file "
|
ASSERT_TRUE(NULL != aecm_echo_path_in_file) << "Unable to open file "
|
||||||
@ -504,6 +523,7 @@ void void_main(int argc, char* argv[]) {
|
|||||||
int drift_samples = 0;
|
int drift_samples = 0;
|
||||||
int capture_level = 127;
|
int capture_level = 127;
|
||||||
int8_t stream_has_voice = 0;
|
int8_t stream_has_voice = 0;
|
||||||
|
float ns_speech_prob = 0.0f;
|
||||||
|
|
||||||
TickTime t0 = TickTime::Now();
|
TickTime t0 = TickTime::Now();
|
||||||
TickTime t1 = t0;
|
TickTime t1 = t0;
|
||||||
@ -643,6 +663,14 @@ void void_main(int argc, char* argv[]) {
|
|||||||
vad_out_file));
|
vad_out_file));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ns_prob_file != NULL) {
|
||||||
|
ns_speech_prob = apm->noise_suppression()->speech_probability();
|
||||||
|
ASSERT_EQ(1u, fwrite(&ns_speech_prob,
|
||||||
|
sizeof(ns_speech_prob),
|
||||||
|
1,
|
||||||
|
ns_prob_file));
|
||||||
|
}
|
||||||
|
|
||||||
if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) {
|
if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) {
|
||||||
ASSERT_EQ(msg.level(), capture_level);
|
ASSERT_EQ(msg.level(), capture_level);
|
||||||
}
|
}
|
||||||
@ -842,6 +870,14 @@ void void_main(int argc, char* argv[]) {
|
|||||||
vad_out_file));
|
vad_out_file));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ns_prob_file != NULL) {
|
||||||
|
ns_speech_prob = apm->noise_suppression()->speech_probability();
|
||||||
|
ASSERT_EQ(1u, fwrite(&ns_speech_prob,
|
||||||
|
sizeof(ns_speech_prob),
|
||||||
|
1,
|
||||||
|
ns_prob_file));
|
||||||
|
}
|
||||||
|
|
||||||
if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) {
|
if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) {
|
||||||
ASSERT_EQ(capture_level_in, capture_level);
|
ASSERT_EQ(capture_level_in, capture_level);
|
||||||
}
|
}
|
||||||
|
@ -1278,6 +1278,7 @@ TEST_F(ApmTest, Process) {
|
|||||||
int analog_level = 127;
|
int analog_level = 127;
|
||||||
int analog_level_average = 0;
|
int analog_level_average = 0;
|
||||||
int max_output_average = 0;
|
int max_output_average = 0;
|
||||||
|
float ns_speech_prob_average = 0.0f;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
if (!ReadFrame(far_file_, revframe_)) break;
|
if (!ReadFrame(far_file_, revframe_)) break;
|
||||||
@ -1314,6 +1315,8 @@ TEST_F(ApmTest, Process) {
|
|||||||
EXPECT_EQ(AudioFrame::kVadPassive, frame_->vad_activity_);
|
EXPECT_EQ(AudioFrame::kVadPassive, frame_->vad_activity_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ns_speech_prob_average += apm_->noise_suppression()->speech_probability();
|
||||||
|
|
||||||
size_t frame_size = frame_->samples_per_channel_ * frame_->num_channels_;
|
size_t frame_size = frame_->samples_per_channel_ * frame_->num_channels_;
|
||||||
size_t write_count = fwrite(frame_->data_,
|
size_t write_count = fwrite(frame_->data_,
|
||||||
sizeof(int16_t),
|
sizeof(int16_t),
|
||||||
@ -1327,6 +1330,7 @@ TEST_F(ApmTest, Process) {
|
|||||||
}
|
}
|
||||||
max_output_average /= frame_count;
|
max_output_average /= frame_count;
|
||||||
analog_level_average /= frame_count;
|
analog_level_average /= frame_count;
|
||||||
|
ns_speech_prob_average /= frame_count;
|
||||||
|
|
||||||
#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
|
#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
|
||||||
EchoCancellation::Metrics echo_metrics;
|
EchoCancellation::Metrics echo_metrics;
|
||||||
@ -1368,6 +1372,9 @@ TEST_F(ApmTest, Process) {
|
|||||||
EXPECT_EQ(reference_delay.std(), std);
|
EXPECT_EQ(reference_delay.std(), std);
|
||||||
|
|
||||||
EXPECT_EQ(test->rms_level(), rms_level);
|
EXPECT_EQ(test->rms_level(), rms_level);
|
||||||
|
|
||||||
|
EXPECT_FLOAT_EQ(test->ns_speech_probability_average(),
|
||||||
|
ns_speech_prob_average);
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
test->set_has_echo_count(has_echo_count);
|
test->set_has_echo_count(has_echo_count);
|
||||||
@ -1395,6 +1402,10 @@ TEST_F(ApmTest, Process) {
|
|||||||
message_delay->set_std(std);
|
message_delay->set_std(std);
|
||||||
|
|
||||||
test->set_rms_level(rms_level);
|
test->set_rms_level(rms_level);
|
||||||
|
|
||||||
|
EXPECT_LE(0.0f, ns_speech_prob_average);
|
||||||
|
EXPECT_GE(1.0f, ns_speech_prob_average);
|
||||||
|
test->set_ns_speech_probability_average(ns_speech_prob_average);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,6 +44,9 @@ message Test {
|
|||||||
optional DelayMetrics delay_metrics = 12;
|
optional DelayMetrics delay_metrics = 12;
|
||||||
|
|
||||||
optional int32 rms_level = 13;
|
optional int32 rms_level = 13;
|
||||||
|
|
||||||
|
optional float ns_speech_probability_average = 14;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
message OutputData {
|
message OutputData {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user