[DEV] add a simple VAD with speex library

2019-04-01 22:09:41 +02:00 · 2019-04-01 22:09:41 +02:00 · 4b04365f3f
commit 4b04365f3f
parent 44911d3616
4 changed files with 187 additions and 4 deletions
--- a/audio/algo/speex/Resampler.cpp
+++ b/audio/algo/speex/Resampler.cpp
@ -78,6 +78,7 @@ namespace audio {
 								break;
 							case audio::format_float:
 								{
+									AA_SPEEX_ERROR("RESAMPLE: " << _nbChunk << " ==> " << _nbChunkOut);
 									uint32_t nbChunkInput = _nbChunk;
 									uint32_t nbChunkOutput = _nbChunkOut;
 									int ret = speex_resampler_process_interleaved_float(m_speexResampler,
@ -85,6 +86,7 @@ namespace audio {
 									                                                    &nbChunkInput,
 									                                                    reinterpret_cast<float*>(_output),
 									                                                    &nbChunkOutput);
+									AA_SPEEX_ERROR("RESAMPLE: " << nbChunkInput << " ==> " << nbChunkOutput << " DONE");
 									// Check all input and output ...
 									if (nbChunkInput != _nbChunk) {
 										AA_SPEEX_ERROR("inputSize (not all read ...) proceed=" << nbChunkInput << " requested=" << _nbChunk);
@ -132,7 +134,7 @@ etk::Vector<enum audio::format> audio::algo::speex::Resampler::getSupportedForma

 etk::Vector<enum audio::format> audio::algo::speex::Resampler::getNativeSupportedFormat() {
 	etk::Vector<enum audio::format> out;
-	out.pushBack(audio::format_float);
+	//out.pushBack(audio::format_float); ==> sppex dsp only compille in fixpoint, of float ... not at the same time ...
 	out.pushBack(audio::format_int16);
 	return out;
 }
--- a/audio/algo/speex/Vad.cpp
+++ b/audio/algo/speex/Vad.cpp
@ -0,0 +1,114 @@
+/** @file
+ * @author Edouard DUPIN 
+ * @copyright 2011, Edouard DUPIN, all right reserved
+ * @license APACHE v2.0 (see license file)
+ */
+
+#define speex_POSIX 1
+#include <audio/algo/speex/Vad.hpp>
+#include <audio/algo/speex/debug.hpp>
+#include <cmath>
+
+#include <webrtc/common_audio/vad/include/webrtc_vad.h>
+
+namespace audio {
+	namespace algo {
+		namespace speex {
+			class VadPrivate {
+				private:
+					VadInst *m_vad = null;
+					bool m_voiceDetected = false;
+					uint32_t calculateBlockSize(uint32_t _sampleRate, uint32_t _sizeInMs) {
+						return 0;
+					}
+				public:
+					VadPrivate() {
+						WebRtcVad_Create(&m_vad);
+						WebRtcVad_Init(m_vad);
+					}
+				
+				~VadPrivate() {
+					/* TODO : Check this leak ...
+					if (m_speexPreprocessState) {
+						speex_preprocess_state_destroy(m_speexPreprocessState);
+						m_speexPreprocessState = null;
+					}
+					if (m_speexEchoState) {
+						speex_echo_state_destroy(m_speexEchoState);
+						m_speexEchoState = null;
+					}
+					*/
+				}
+				
+				/**
+				 * @brief Main input algo process.
+				 * @param[in,out] _output Output data.
+				 * @param[in] _input Input data.
+				 * @param[in] _inputFeedback Input feedback data (all time MONO).
+				 * @param[in] _nbChunk Number of chunk in the input buffer.
+				 * @param[in] _nbChannel Number of channel in the stream.
+				 */
+				void process(const void* _input, size_t _nbChunk) {
+					if (m_vad == null) {
+						AA_SPEEX_ERROR("speex handle is not initialize ==> can not process");
+						return;
+					}
+					m_voiceDetected = WebRtcVad_Process(m_vad, 16000, (const int16_t*)_input, _nbChunk);
+				}
+				
+				int32_t getOptimalFrameSize() {
+					return 160;
+				}
+				
+				bool getVoiceDetected() {
+					return m_voiceDetected;
+				}
+			};
+		}
+	}
+}
+
+
+void audio::algo::speex::Vad::init(int8_t _nbChannel, float _sampleRate, enum audio::format _format) {
+	m_private.reset();
+	m_private = ememory::makeShared<audio::algo::speex::VadPrivate>();
+}
+
+etk::Vector<float> audio::algo::speex::Vad::getSupportedSampleRate() {
+	etk::Vector<float> out;
+	out.pushBack(16000);
+	return out;
+}
+etk::Vector<enum audio::format> audio::algo::speex::Vad::getSupportedFormat() {
+	return getNativeSupportedFormat();
+}
+
+etk::Vector<enum audio::format> audio::algo::speex::Vad::getNativeSupportedFormat() {
+	etk::Vector<enum audio::format> out;
+	out.pushBack(audio::format_int16);
+	return out;
+}
+ /// set 10 ms ==> 160 sample
+void audio::algo::speex::Vad::process(const void* _input, size_t _nbChunk) {
+	if (m_private == null) {
+		AA_SPEEX_ERROR("Algo is not initialized...");
+	}
+	m_private->process(_input, _nbChunk);
+}
+
+int32_t audio::algo::speex::Vad::getOptimalFrameSize() {
+	if (m_private == null) {
+		AA_SPEEX_ERROR("Algo is not initialized...");
+		return 32;
+	}
+	return m_private->getOptimalFrameSize();
+}
+
+
+bool audio::algo::speex::Vad::getVoiceDetected() {
+	if (m_private == null) {
+		AA_SPEEX_ERROR("Algo is not initialized...");
+		return 32;
+	}
+	return m_private->getVoiceDetected();
+}
--- a/audio/algo/speex/Vad.hpp
+++ b/audio/algo/speex/Vad.hpp
@ -0,0 +1,64 @@
+/** @file
+ * @author Edouard DUPIN 
+ * @copyright 2011, Edouard DUPIN, all right reserved
+ * @license APACHE v2.0 (see license file)
+ */
+#pragma once
+
+#include <etk/types.hpp>
+#include <audio/format.hpp>
+#include <chrono>
+#include <ememory/memory.hpp>
+#include <vector>
+
+namespace audio {
+	namespace algo {
+		namespace speex {
+			class VadPrivate;
+			class Vad {
+				public:
+					Vad() = default;
+					virtual ~Vad() = default;
+				public:
+					/**
+					 * @brief Initialize the Algorithm
+					 * @param[in] _nbChannel Number of channel in the stream.
+					 * @param[in] _sampleRate Sample rate.
+					 * @param[in] _format Input/output data format.
+					 */
+					virtual void init(int8_t _nbChannel, float _sampleRate, enum audio::format _format = audio::format_float);
+					/**
+					 * @brief Get list of samplerate suported.
+					 * @return list of supported samplerate
+					 */
+					virtual etk::Vector<float> getSupportedSampleRate();
+					/**
+					 * @brief Get list of format suported in input.
+					 * @return list of supported format
+					 */
+					virtual etk::Vector<enum audio::format> getSupportedFormat();
+					/**
+					 * @brief Get list of algorithm format suported. No format convertion.
+					 * @return list of supported format
+					 */
+					virtual etk::Vector<enum audio::format> getNativeSupportedFormat();
+					/**
+					 * @brief Main input algo process.
+					 * @param[in,out] _output Output data.
+					 * @param[in] _input Input data.
+					 * @param[in] _inputFeedback Input feedback data (all time MONO).
+					 * @param[in] _nbChunk Number of chunk in the input buffer.
+					 * @param[in] _nbChannel Number of channel in the stream.
+					 */
+					virtual void process(const void* _input, size_t _nbChunk);
+					
+					bool getVoiceDetected();
+					
+					int32_t getOptimalFrameSize();
+				protected:
+					ememory::SharedPtr<VadPrivate> m_private; //!< private data.
+			};
+		}
+	}
+}
+
--- a/lutin_audio-algo-speex.py
+++ b/lutin_audio-algo-speex.py
@ -28,15 +28,18 @@ def configure(target, my_module):
 	my_module.add_src_file([
 	    'audio/algo/speex/debug.cpp',
 	    'audio/algo/speex/Resampler.cpp',
-	    'audio/algo/speex/Aec.cpp'
+	    'audio/algo/speex/Aec.cpp',
+	    'audio/algo/speex/Vad.cpp',
 	    ])
 	my_module.add_header_file([
 	    'audio/algo/speex/Resampler.hpp',
-	    'audio/algo/speex/Aec.hpp'
+	    'audio/algo/speex/Aec.hpp',
+	    'audio/algo/speex/Vad.hpp',
 	    ])
 	my_module.add_depend([
 	    'etk',
-	    'audio'
+	    'audio',
+	    'webrtc',
 	    ])
 	my_module.add_optionnal_depend('speex-dsp', ["c++", "-DHAVE_SPEEX_DSP"])
 	my_module.add_path(".")