Refactoring vad_gmm.[c/h].
- Changed to stdint. - Replaced SHIFT macros. - Variable name changes. - Style changes. - Comments updates. - Added a unit test. Review URL: http://webrtc-codereview.appspot.com/323011 git-svn-id: http://webrtc.googlecode.com/svn/trunk@1249 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
		| @@ -8,68 +8,76 @@ | |||||||
|  *  be found in the AUTHORS file in the root of the source tree. |  *  be found in the AUTHORS file in the root of the source tree. | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
|  |  | ||||||
| /* |  | ||||||
|  * This file includes the implementation of the internal VAD call |  | ||||||
|  * WebRtcVad_GaussianProbability. For function description, see vad_gmm.h. |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| #include "vad_gmm.h" | #include "vad_gmm.h" | ||||||
|  |  | ||||||
| #include "signal_processing_library.h" | #include "signal_processing_library.h" | ||||||
| #include "typedefs.h" | #include "typedefs.h" | ||||||
|  |  | ||||||
| static const WebRtc_Word32 kCompVar = 22005; | static const int32_t kCompVar = 22005; | ||||||
| // Constant log2(exp(1)) in Q12 | static const int16_t kLog2Exp = 5909;  // log2(exp(1)) in Q12. | ||||||
| static const WebRtc_Word16 kLog10Const = 5909; |  | ||||||
|  |  | ||||||
| WebRtc_Word32 WebRtcVad_GaussianProbability(WebRtc_Word16 in_sample, | // For a normal distribution, the probability of |input| is calculated and | ||||||
|                                             WebRtc_Word16 mean, | // returned (in Q20). The formula for normal distributed probability is | ||||||
|                                             WebRtc_Word16 std, | // | ||||||
|                                             WebRtc_Word16 *delta) | // 1 / s * exp(-(x - m)^2 / (2 * s^2)) | ||||||
| { | // | ||||||
|     WebRtc_Word16 tmp16, tmpDiv, tmpDiv2, expVal, tmp16_1, tmp16_2; | // where the parameters are given in the following Q domains: | ||||||
|     WebRtc_Word32 tmp32, y32; | // m = |mean| (Q7) | ||||||
|  | // s = |std| (Q7) | ||||||
|  | // x = |input| (Q4) | ||||||
|  | // in addition to the probability we output |delta| (in Q11) used when updating | ||||||
|  | // the noise/speech model. | ||||||
|  | int32_t WebRtcVad_GaussianProbability(int16_t input, | ||||||
|  |                                       int16_t mean, | ||||||
|  |                                       int16_t std, | ||||||
|  |                                       int16_t* delta) { | ||||||
|  |   int16_t tmp16, inv_std, inv_std2, exp_value = 0; | ||||||
|  |   int32_t tmp32; | ||||||
|  |  | ||||||
|     // Calculate tmpDiv=1/std, in Q10 |   // Calculate |inv_std| = 1 / s, in Q10. | ||||||
|     tmp32 = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_W16(std,1) + (WebRtc_Word32)131072; // 1 in Q17 |   // 131072 = 1 in Q17, and (|std| >> 1) is for rounding instead of truncation. | ||||||
|     tmpDiv = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32, std); // Q17/Q7 = Q10 |   // Q-domain: Q17 / Q7 = Q10. | ||||||
|  |   tmp32 = (int32_t) 131072 + (int32_t) (std >> 1); | ||||||
|  |   inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std); | ||||||
|  |  | ||||||
|     // Calculate tmpDiv2=1/std^2, in Q14 |   // Calculate |inv_std2| = 1 / s^2, in Q14. | ||||||
|     tmp16 = WEBRTC_SPL_RSHIFT_W16(tmpDiv, 2); // From Q10 to Q8 |   tmp16 = (inv_std >> 2);  // Q10 -> Q8. | ||||||
|     tmpDiv2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, tmp16, 2); // (Q8 * Q8)>>2 = Q14 |   // Q-domain: (Q8 * Q8) >> 2 = Q14. | ||||||
|  |   inv_std2 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(tmp16, tmp16, 2); | ||||||
|  |   // TODO(bjornv): Investigate if changing to | ||||||
|  |   // |inv_std2| = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(|inv_std|, |inv_std|, 6); | ||||||
|  |   // gives better accuracy. | ||||||
|  |  | ||||||
|     tmp16 = WEBRTC_SPL_LSHIFT_W16(in_sample, 3); // Q7 |   tmp16 = (input << 3);  // Q4 -> Q7 | ||||||
|   tmp16 = tmp16 - mean;  // Q7 - Q7 = Q7 |   tmp16 = tmp16 - mean;  // Q7 - Q7 = Q7 | ||||||
|  |  | ||||||
|     // To be used later, when updating noise/speech model |   // To be used later, when updating noise/speech model. | ||||||
|     // delta = (x-m)/std^2, in Q11 |   // |delta| = (x - m) / s^2, in Q11. | ||||||
|     *delta = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmpDiv2, tmp16, 10); //(Q14*Q7)>>10 = Q11 |   // Q-domain: (Q14 * Q7) >> 10 = Q11. | ||||||
|  |   *delta = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(inv_std2, tmp16, 10); | ||||||
|  |  | ||||||
|     // Calculate tmp32=(x-m)^2/(2*std^2), in Q10 |   // Calculate the exponent |tmp32| = (x - m)^2 / (2 * s^2), in Q10. Replacing | ||||||
|     tmp32 = (WebRtc_Word32)WEBRTC_SPL_MUL_16_16_RSFT(*delta, tmp16, 9); // One shift for /2 |   // division by two with one shift. | ||||||
|  |   // Q-domain: (Q11 * Q7) >> 8 = Q10. | ||||||
|  |   tmp32 = WEBRTC_SPL_MUL_16_16_RSFT(*delta, tmp16, 9); | ||||||
|  |  | ||||||
|     // Calculate expVal ~= exp(-(x-m)^2/(2*std^2)) ~= exp2(-log2(exp(1))*tmp32) |   // If the exponent is small enough to give a non-zero probability we calculate | ||||||
|     if (tmp32 < kCompVar) |   // |exp_value| ~= exp(-(x - m)^2 / (2 * s^2)) | ||||||
|     { |   //             ~= exp2(-log2(exp(1)) * |tmp32|). | ||||||
|         // Calculate tmp16 = log2(exp(1))*tmp32 , in Q10 |   if (tmp32 < kCompVar) { | ||||||
|         tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((WebRtc_Word16)tmp32, |     // Calculate |tmp16| = log2(exp(1)) * |tmp32|, in Q10. | ||||||
|                                                          kLog10Const, 12); |     // Q-domain: (Q12 * Q10) >> 12 = Q10. | ||||||
|  |     tmp16 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(kLog2Exp, (int16_t) tmp32, 12); | ||||||
|     tmp16 = -tmp16; |     tmp16 = -tmp16; | ||||||
|         tmp16_2 = (WebRtc_Word16)(0x0400 | (tmp16 & 0x03FF)); |     exp_value = (0x0400 | (tmp16 & 0x03FF)); | ||||||
|         tmp16_1 = (WebRtc_Word16)(tmp16 ^ 0xFFFF); |     tmp16 ^= 0xFFFF; | ||||||
|         tmp16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W16(tmp16_1, 10); |     tmp16 >>= 10; | ||||||
|     tmp16 += 1; |     tmp16 += 1; | ||||||
|         // Calculate expVal=log2(-tmp32), in Q10 |     // Get |exp_value| = exp(-|tmp32|) in Q10. | ||||||
|         expVal = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32((WebRtc_Word32)tmp16_2, tmp16); |     exp_value >>= tmp16; | ||||||
|  |  | ||||||
|     } else |  | ||||||
|     { |  | ||||||
|         expVal = 0; |  | ||||||
|   } |   } | ||||||
|  |  | ||||||
|     // Calculate y32=(1/std)*exp(-(x-m)^2/(2*std^2)), in Q20 |   // Calculate and return (1 / s) * exp(-(x - m)^2 / (2 * s^2)), in Q20. | ||||||
|     y32 = WEBRTC_SPL_MUL_16_16(tmpDiv, expVal); // Q10 * Q10 = Q20 |   // Q-domain: Q10 * Q10 = Q20. | ||||||
|  |   return WEBRTC_SPL_MUL_16_16(inv_std, exp_value); | ||||||
|     return y32; // Q20 |  | ||||||
| } | } | ||||||
|   | |||||||
| @@ -8,40 +8,32 @@ | |||||||
|  *  be found in the AUTHORS file in the root of the source tree. |  *  be found in the AUTHORS file in the root of the source tree. | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
|  | // Gaussian probability calculations internally used in vad_core.c. | ||||||
|  |  | ||||||
| /* | #ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_GMM_H_ | ||||||
|  * This header file includes the description of the internal VAD call | #define WEBRTC_COMMON_AUDIO_VAD_VAD_GMM_H_ | ||||||
|  * WebRtcVad_GaussianProbability. |  | ||||||
|  */ |  | ||||||
|  |  | ||||||
| #ifndef WEBRTC_VAD_GMM_H_ |  | ||||||
| #define WEBRTC_VAD_GMM_H_ |  | ||||||
|  |  | ||||||
| #include "typedefs.h" | #include "typedefs.h" | ||||||
|  |  | ||||||
| /**************************************************************************** | // Calculates the probability for |input|, given that |input| comes from a | ||||||
|  * WebRtcVad_GaussianProbability(...) | // normal distribution with mean and standard deviation (|mean|, |std|). | ||||||
|  * | // | ||||||
|  * This function calculates the probability for the value 'in_sample', given that in_sample | // Inputs: | ||||||
|  * comes from a normal distribution with mean 'mean' and standard deviation 'std'. | //      - input         : input sample in Q4. | ||||||
|  * | //      - mean          : mean input in the statistical model, Q7. | ||||||
|  * Input: | //      - std           : standard deviation, Q7. | ||||||
|  *      - in_sample     : Input sample in Q4 | // | ||||||
|  *      - mean          : mean value in the statistical model, Q7 | // Output: | ||||||
|  *      - std           : standard deviation, Q7 | // | ||||||
|  * | //      - delta         : input used when updating the model, Q11. | ||||||
|  * Output: | //                        |delta| = (|input| - |mean|) / |std|^2. | ||||||
|  * | // | ||||||
|  *      - delta         : Value used when updating the model, Q11 | // Return: | ||||||
|  * | //   (probability for |input|) = | ||||||
|  * Return: | //    1 / |std| * exp(-(|input| - |mean|)^2 / (2 * |std|^2)); | ||||||
|  *      - out           : out = 1/std * exp(-(x-m)^2/(2*std^2)); | int32_t WebRtcVad_GaussianProbability(int16_t input, | ||||||
|  *                        Probability for x. |                                       int16_t mean, | ||||||
|  * |                                       int16_t std, | ||||||
|  */ |                                       int16_t* delta); | ||||||
| WebRtc_Word32 WebRtcVad_GaussianProbability(WebRtc_Word16 in_sample, |  | ||||||
|                                             WebRtc_Word16 mean, |  | ||||||
|                                             WebRtc_Word16 std, |  | ||||||
|                                             WebRtc_Word16 *delta); |  | ||||||
|  |  | ||||||
| #endif // WEBRTC_VAD_GMM_H_ | #endif  // WEBRTC_COMMON_AUDIO_VAD_VAD_GMM_H_ | ||||||
|   | |||||||
| @@ -15,6 +15,13 @@ | |||||||
| #include "typedefs.h" | #include "typedefs.h" | ||||||
| #include "webrtc_vad.h" | #include "webrtc_vad.h" | ||||||
|  |  | ||||||
|  | #ifdef __cplusplus | ||||||
|  | extern "C" | ||||||
|  | { | ||||||
|  | #include "vad_gmm.h" | ||||||
|  | } | ||||||
|  | #endif | ||||||
|  |  | ||||||
| namespace webrtc { | namespace webrtc { | ||||||
| namespace { | namespace { | ||||||
| const int16_t kModes[] = { 0, 1, 2, 3 }; | const int16_t kModes[] = { 0, 1, 2, 3 }; | ||||||
| @@ -152,6 +159,29 @@ TEST_F(VadTest, ApiTest) { | |||||||
|   EXPECT_EQ(0, WebRtcVad_Free(handle)); |   EXPECT_EQ(0, WebRtcVad_Free(handle)); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | TEST_F(VadTest, GMMTests) { | ||||||
|  |   int16_t delta = 0; | ||||||
|  |   // Input value at mean. | ||||||
|  |   EXPECT_EQ(1048576, WebRtcVad_GaussianProbability(0, 0, 128, &delta)); | ||||||
|  |   EXPECT_EQ(0, delta); | ||||||
|  |   EXPECT_EQ(1048576, WebRtcVad_GaussianProbability(16, 128, 128, &delta)); | ||||||
|  |   EXPECT_EQ(0, delta); | ||||||
|  |   EXPECT_EQ(1048576, WebRtcVad_GaussianProbability(-16, -128, 128, &delta)); | ||||||
|  |   EXPECT_EQ(0, delta); | ||||||
|  |  | ||||||
|  |   // Largest possible input to give non-zero probability. | ||||||
|  |   EXPECT_EQ(1024, WebRtcVad_GaussianProbability(59, 0, 128, &delta)); | ||||||
|  |   EXPECT_EQ(7552, delta); | ||||||
|  |   EXPECT_EQ(1024, WebRtcVad_GaussianProbability(75, 128, 128, &delta)); | ||||||
|  |   EXPECT_EQ(7552, delta); | ||||||
|  |   EXPECT_EQ(1024, WebRtcVad_GaussianProbability(-75, -128, 128, &delta)); | ||||||
|  |   EXPECT_EQ(-7552, delta); | ||||||
|  |  | ||||||
|  |   // Too large input, should give zero probability. | ||||||
|  |   EXPECT_EQ(0, WebRtcVad_GaussianProbability(105, 0, 128, &delta)); | ||||||
|  |   EXPECT_EQ(13440, delta); | ||||||
|  | } | ||||||
|  |  | ||||||
| // TODO(bjornv): Add a process test, run on file. | // TODO(bjornv): Add a process test, run on file. | ||||||
|  |  | ||||||
| }  // namespace | }  // namespace | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 bjornv@webrtc.org
					bjornv@webrtc.org