VAD Refactoring: WebRtcVad_Process()

Code style: Indentation, braces Tested with trybot, vad_unittests, audioproc_unittest BUG=None TEST=None Review URL: https://webrtc-codereview.appspot.com/579012 git-svn-id: http://webrtc.googlecode.com/svn/trunk@2396 4adac7df-926f-26a2-2b94-8c16560cd09d
2012-06-12 08:19:24 +00:00 · 2012-06-12 08:19:24 +00:00 · b1c3276f5a
commit b1c3276f5a
parent 5f9f1db12a
4 changed files with 107 additions and 88 deletions
--- a/src/common_audio/vad/include/webrtc_vad.h
+++ b/src/common_audio/vad/include/webrtc_vad.h
@ -13,12 +13,10 @@
 * This header file includes the VAD API calls. Specific function calls are given below.
 */

-#ifndef WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
+#ifndef WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
 #define WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_

-#include <stdlib.h>
-
-#include "typedefs.h"
+#include "typedefs.h"  // NOLINT

 typedef struct WebRtcVadInst VadInst;

@ -61,29 +59,32 @@ int WebRtcVad_Init(VadInst* handle);
 //                       has not been initialized).
 int WebRtcVad_set_mode(VadInst* handle, int mode);

-/****************************************************************************
- * WebRtcVad_Process(...)
- * 
- * This functions does a VAD for the inserted speech frame
- *
- * Input
- *        - vad_inst     : VAD Instance. Needs to be initiated before call.
- *        - fs           : sampling frequency (Hz): 8000, 16000, or 32000
- *        - speech_frame : Pointer to speech frame buffer
- *        - frame_length : Length of speech frame buffer in number of samples
- *
- * Output:
- *        - vad_inst     : Updated VAD instance
- *
- * Return value          :  1 - Active Voice
- *                          0 - Non-active Voice
- *                         -1 - Error
- */
-int16_t WebRtcVad_Process(VadInst* vad_inst, int16_t fs, int16_t* speech_frame,
+// Calculates a VAD decision for the |audio_frame|. For valid sampling rates
+// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
+//
+// - handle       [i/o] : VAD Instance. Needs to be initialized by
+//                        WebRtcVad_Init() before call.
+// - fs           [i]   : Sampling frequency (Hz): 8000, 16000, or 32000
+// - audio_frame  [i]   : Audio frame buffer.
+// - frame_length [i]   : Length of audio frame buffer in number of samples.
+//
+// returns              : 1 - (Active Voice),
+//                        0 - (Non-active Voice),
+//                       -1 - (Error)
+int16_t WebRtcVad_Process(VadInst* handle, int16_t fs, int16_t* audio_frame,
                          int16_t frame_length);

+// Checks for valid combinations of |rate| and |frame_length|. We support 10,
+// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
+//
+// - rate         [i] : Sampling frequency (Hz).
+// - frame_length [i] : Speech frame buffer length in number of samples.
+//
+// returns            : 0 - (valid combination), -1 - (invalid combination)
+int WebRtcVad_ValidRateAndFrameLength(int rate, int frame_length);
+
 #ifdef __cplusplus
 }
 #endif

-#endif  // WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
+#endif  // WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
--- a/src/common_audio/vad/vad_unittest.cc
+++ b/src/common_audio/vad/vad_unittest.cc
@ -12,10 +12,11 @@

 #include <stdlib.h>

-#include "common_audio/signal_processing/include/signal_processing_library.h"
 #include "gtest/gtest.h"
+
+#include "common_audio/signal_processing/include/signal_processing_library.h"
+#include "common_audio/vad/include/webrtc_vad.h"
 #include "typedefs.h"
-#include "webrtc_vad.h"

 VadTest::VadTest() {}

@ -119,6 +120,22 @@ TEST_F(VadTest, ApiTest) {
  EXPECT_EQ(0, WebRtcVad_Free(handle));
 }

+TEST_F(VadTest, ValidRatesFrameLengths) {
+  // This test verifies valid and invalid rate/frame_length combinations. We
+  // loop through sampling rates and frame lengths from negative values to
+  // values larger than possible.
+  for (int16_t rate = -1; rate <= kRates[kRatesSize - 1] + 1; rate++) {
+    for (int16_t frame_length = -1; frame_length <= kMaxFrameLength + 1;
+        frame_length++) {
+      if (ValidRatesAndFrameLengths(rate, frame_length)) {
+        EXPECT_EQ(0, WebRtcVad_ValidRateAndFrameLength(rate, frame_length));
+      } else {
+        EXPECT_EQ(-1, WebRtcVad_ValidRateAndFrameLength(rate, frame_length));
+      }
+    }
+  }
+}
+
 // TODO(bjornv): Add a process test, run on file.

 }  // namespace
--- a/src/common_audio/vad/vad_unittest.h
+++ b/src/common_audio/vad/vad_unittest.h
@ -8,12 +8,13 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef WEBRTC_COMMONT_AUDIO_VAD_VAD_UNIT_TESTS_H
-#define WEBRTC_COMMONT_AUDIO_VAD_VAD_UNIT_TESTS_H
+#ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_UNITTEST_H
+#define WEBRTC_COMMON_AUDIO_VAD_VAD_UNITTEST_H

 #include <stddef.h>  // size_t

 #include "gtest/gtest.h"
+
 #include "typedefs.h"

 namespace {
@ -44,4 +45,4 @@ class VadTest : public ::testing::Test {
  bool ValidRatesAndFrameLengths(int16_t rate, int16_t frame_length);
 };

-#endif  // WEBRTC_COMMONT_AUDIO_VAD_VAD_UNIT_TESTS_H
+#endif  // WEBRTC_COMMON_AUDIO_VAD_VAD_UNITTEST_H
--- a/src/common_audio/vad/webrtc_vad.c
+++ b/src/common_audio/vad/webrtc_vad.c
@ -8,13 +8,18 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include "common_audio/vad/include/webrtc_vad.h"
+
 #include <stdlib.h>
 #include <string.h>

-#include "webrtc_vad.h"
-#include "vad_core.h"
+#include "common_audio/vad/vad_core.h"
+#include "typedefs.h"

 static const int kInitCheck = 42;
+static const int kValidRates[] = { 8000, 16000, 32000 };
+static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates);
+static const int kMaxFrameLengthMs = 30;

 int WebRtcVad_Create(VadInst** handle) {
  VadInstT* self = NULL;
@ -66,65 +71,60 @@ int WebRtcVad_set_mode(VadInst* handle, int mode) {
  return WebRtcVad_set_mode_core(self, mode);
 }

-int16_t WebRtcVad_Process(VadInst* vad_inst, int16_t fs, int16_t* speech_frame,
-                          int16_t frame_length)
-{
-    int16_t vad;
-    VadInstT* vad_ptr;
+int16_t WebRtcVad_Process(VadInst* handle, int16_t fs, int16_t* audio_frame,
+                          int16_t frame_length) {
+  int16_t vad = -1;
+  VadInstT* self = (VadInstT*) handle;

-    if (vad_inst == NULL)
-    {
-        return -1;
-    }
+  if (handle == NULL) {
+    return -1;
+  }

-    vad_ptr = (VadInstT*)vad_inst;
-    if (vad_ptr->init_flag != kInitCheck)
-    {
-        return -1;
-    }
+  if (self->init_flag != kInitCheck) {
+    return -1;
+  }
+  if (audio_frame == NULL) {
+    return -1;
+  }
+  if (WebRtcVad_ValidRateAndFrameLength(fs, frame_length) != 0) {
+    return -1;
+  }

-    if (speech_frame == NULL)
-    {
-        return -1;
-    }
+  if (fs == 32000) {
+    vad = WebRtcVad_CalcVad32khz(self, audio_frame, frame_length);
+  } else if (fs == 16000) {
+    vad = WebRtcVad_CalcVad16khz(self, audio_frame, frame_length);
+  } else if (fs == 8000) {
+    vad = WebRtcVad_CalcVad8khz(self, audio_frame, frame_length);
+  }

-    if (fs == 32000)
-    {
-        if ((frame_length != 320) && (frame_length != 640) && (frame_length != 960))
-        {
-            return -1;
-        }
-        vad = WebRtcVad_CalcVad32khz((VadInstT*)vad_inst, speech_frame, frame_length);
-
-    } else if (fs == 16000)
-    {
-        if ((frame_length != 160) && (frame_length != 320) && (frame_length != 480))
-        {
-            return -1;
-        }
-        vad = WebRtcVad_CalcVad16khz((VadInstT*)vad_inst, speech_frame, frame_length);
-
-    } else if (fs == 8000)
-    {
-        if ((frame_length != 80) && (frame_length != 160) && (frame_length != 240))
-        {
-            return -1;
-        }
-        vad = WebRtcVad_CalcVad8khz((VadInstT*)vad_inst, speech_frame, frame_length);
-
-    } else
-    {
-        return -1; // Not a supported sampling frequency
-    }
-
-    if (vad > 0)
-    {
-        return 1;
-    } else if (vad == 0)
-    {
-        return 0;
-    } else
-    {
-        return -1;
-    }
+  if (vad > 0) {
+    vad = 1;
+  }
+  return vad;
+}
+
+int WebRtcVad_ValidRateAndFrameLength(int rate, int frame_length) {
+  int return_value = -1;
+  size_t i;
+  int valid_length_ms;
+  int valid_length;
+
+  // We only allow 10, 20 or 30 ms frames. Loop through valid frame rates and
+  // see if we have a matching pair.
+  for (i = 0; i < kRatesSize; i++) {
+    if (kValidRates[i] == rate) {
+      for (valid_length_ms = 10; valid_length_ms <= kMaxFrameLengthMs;
+          valid_length_ms += 10) {
+        valid_length = (kValidRates[i] / 1000 * valid_length_ms);
+        if (frame_length == valid_length) {
+          return_value = 0;
+          break;
+        }
+      }
+      break;
+    }
+  }
+
+  return return_value;
 }