[DEV] add a simple VAD with speex library

2019-04-01 22:09:41 +02:00 · 2019-04-01 22:09:41 +02:00 · 4b04365f3f
commit 4b04365f3f
parent 44911d3616
4 changed files with 187 additions and 4 deletions
--- a/audio/algo/speex/Resampler.cpp
+++ b/audio/algo/speex/Resampler.cpp
@ -78,6 +78,7 @@ namespace audio {
 								break;
 							case audio::format_float:
 								{
 									AA_SPEEX_ERROR("RESAMPLE: " << _nbChunk << " ==> " << _nbChunkOut);
 									uint32_t nbChunkInput = _nbChunk;
 									uint32_t nbChunkOutput = _nbChunkOut;
 									int ret = speex_resampler_process_interleaved_float(m_speexResampler,
@ -85,6 +86,7 @@ namespace audio {
 									                                                    &nbChunkInput,
 									                                                    reinterpret_cast<float*>(_output),
 									                                                    &nbChunkOutput);
 									AA_SPEEX_ERROR("RESAMPLE: " << nbChunkInput << " ==> " << nbChunkOutput << " DONE");
 									// Check all input and output ...
 									if (nbChunkInput != _nbChunk) {
 										AA_SPEEX_ERROR("inputSize (not all read ...) proceed=" << nbChunkInput << " requested=" << _nbChunk);
@ -132,7 +134,7 @@ etk::Vector<enum audio::format> audio::algo::speex::Resampler::getSupportedForma
 etk::Vector<enum audio::format> audio::algo::speex::Resampler::getNativeSupportedFormat() {
 	etk::Vector<enum audio::format> out;
-	out.pushBack(audio::format_float);
+	//out.pushBack(audio::format_float); ==> sppex dsp only compille in fixpoint, of float ... not at the same time ...
 	out.pushBack(audio::format_int16);
 	return out;
 }
--- a/audio/algo/speex/Vad.cpp
+++ b/audio/algo/speex/Vad.cpp
@ -0,0 +1,114 @@
 /** @file
 * @author Edouard DUPIN 
 * @copyright 2011, Edouard DUPIN, all right reserved
 * @license APACHE v2.0 (see license file)
 */
 #define speex_POSIX 1
 #include <audio/algo/speex/Vad.hpp>
 #include <audio/algo/speex/debug.hpp>
 #include <cmath>
 #include <webrtc/common_audio/vad/include/webrtc_vad.h>
 namespace audio {
 	namespace algo {
 		namespace speex {
 			class VadPrivate {
 				private:
 					VadInst *m_vad = null;
 					bool m_voiceDetected = false;
 					uint32_t calculateBlockSize(uint32_t _sampleRate, uint32_t _sizeInMs) {
 						return 0;
 					}
 				public:
 					VadPrivate() {
 						WebRtcVad_Create(&m_vad);
 						WebRtcVad_Init(m_vad);
 					}
 				~VadPrivate() {
 					/* TODO : Check this leak ...
 					if (m_speexPreprocessState) {
 						speex_preprocess_state_destroy(m_speexPreprocessState);
 						m_speexPreprocessState = null;
 					}
 					if (m_speexEchoState) {
 						speex_echo_state_destroy(m_speexEchoState);
 						m_speexEchoState = null;
 					}
 					*/
 				}
 				/**
 				 * @brief Main input algo process.
 				 * @param[in,out] _output Output data.
 				 * @param[in] _input Input data.
 				 * @param[in] _inputFeedback Input feedback data (all time MONO).
 				 * @param[in] _nbChunk Number of chunk in the input buffer.
 				 * @param[in] _nbChannel Number of channel in the stream.
 				 */
 				void process(const void* _input, size_t _nbChunk) {
 					if (m_vad == null) {
 						AA_SPEEX_ERROR("speex handle is not initialize ==> can not process");
 						return;
 					}
 					m_voiceDetected = WebRtcVad_Process(m_vad, 16000, (const int16_t*)_input, _nbChunk);
 				}
 				int32_t getOptimalFrameSize() {
 					return 160;
 				}
 				bool getVoiceDetected() {
 					return m_voiceDetected;
 				}
 			};
 		}
 	}
 }
 void audio::algo::speex::Vad::init(int8_t _nbChannel, float _sampleRate, enum audio::format _format) {
 	m_private.reset();
 	m_private = ememory::makeShared<audio::algo::speex::VadPrivate>();
 }
 etk::Vector<float> audio::algo::speex::Vad::getSupportedSampleRate() {
 	etk::Vector<float> out;
 	out.pushBack(16000);
 	return out;
 }
 etk::Vector<enum audio::format> audio::algo::speex::Vad::getSupportedFormat() {
 	return getNativeSupportedFormat();
 }
 etk::Vector<enum audio::format> audio::algo::speex::Vad::getNativeSupportedFormat() {
 	etk::Vector<enum audio::format> out;
 	out.pushBack(audio::format_int16);
 	return out;
 }
 /// set 10 ms ==> 160 sample
 void audio::algo::speex::Vad::process(const void* _input, size_t _nbChunk) {
 	if (m_private == null) {
 		AA_SPEEX_ERROR("Algo is not initialized...");
 	}
 	m_private->process(_input, _nbChunk);
 }
 int32_t audio::algo::speex::Vad::getOptimalFrameSize() {
 	if (m_private == null) {
 		AA_SPEEX_ERROR("Algo is not initialized...");
 		return 32;
 	}
 	return m_private->getOptimalFrameSize();
 }
 bool audio::algo::speex::Vad::getVoiceDetected() {
 	if (m_private == null) {
 		AA_SPEEX_ERROR("Algo is not initialized...");
 		return 32;
 	}
 	return m_private->getVoiceDetected();
 }
--- a/audio/algo/speex/Vad.hpp
+++ b/audio/algo/speex/Vad.hpp
@ -0,0 +1,64 @@
 /** @file
 * @author Edouard DUPIN 
 * @copyright 2011, Edouard DUPIN, all right reserved
 * @license APACHE v2.0 (see license file)
 */
 #pragma once
 #include <etk/types.hpp>
 #include <audio/format.hpp>
 #include <chrono>
 #include <ememory/memory.hpp>
 #include <vector>
 namespace audio {
 	namespace algo {
 		namespace speex {
 			class VadPrivate;
 			class Vad {
 				public:
 					Vad() = default;
 					virtual ~Vad() = default;
 				public:
 					/**
 					 * @brief Initialize the Algorithm
 					 * @param[in] _nbChannel Number of channel in the stream.
 					 * @param[in] _sampleRate Sample rate.
 					 * @param[in] _format Input/output data format.
 					 */
 					virtual void init(int8_t _nbChannel, float _sampleRate, enum audio::format _format = audio::format_float);
 					/**
 					 * @brief Get list of samplerate suported.
 					 * @return list of supported samplerate
 					 */
 					virtual etk::Vector<float> getSupportedSampleRate();
 					/**
 					 * @brief Get list of format suported in input.
 					 * @return list of supported format
 					 */
 					virtual etk::Vector<enum audio::format> getSupportedFormat();
 					/**
 					 * @brief Get list of algorithm format suported. No format convertion.
 					 * @return list of supported format
 					 */
 					virtual etk::Vector<enum audio::format> getNativeSupportedFormat();
 					/**
 					 * @brief Main input algo process.
 					 * @param[in,out] _output Output data.
 					 * @param[in] _input Input data.
 					 * @param[in] _inputFeedback Input feedback data (all time MONO).
 					 * @param[in] _nbChunk Number of chunk in the input buffer.
 					 * @param[in] _nbChannel Number of channel in the stream.
 					 */
 					virtual void process(const void* _input, size_t _nbChunk);
 					bool getVoiceDetected();
 					int32_t getOptimalFrameSize();
 				protected:
 					ememory::SharedPtr<VadPrivate> m_private; //!< private data.
 			};
 		}
 	}
 }
--- a/lutin_audio-algo-speex.py
+++ b/lutin_audio-algo-speex.py
@ -28,15 +28,18 @@ def configure(target, my_module):
 	my_module.add_src_file([
 	    'audio/algo/speex/debug.cpp',
 	    'audio/algo/speex/Resampler.cpp',
-	    'audio/algo/speex/Aec.cpp'
+	    'audio/algo/speex/Aec.cpp',
 	    'audio/algo/speex/Vad.cpp',
 	    ])
 	my_module.add_header_file([
 	    'audio/algo/speex/Resampler.hpp',
-	    'audio/algo/speex/Aec.hpp'
+	    'audio/algo/speex/Aec.hpp',
 	    'audio/algo/speex/Vad.hpp',
 	    ])
 	my_module.add_depend([
 	    'etk',
-	    'audio'
+	    'audio',
 	    'webrtc',
 	    ])
 	my_module.add_optionnal_depend('speex-dsp', ["c++", "-DHAVE_SPEEX_DSP"])
 	my_module.add_path(".")