[DEV] add AEC

This commit is contained in:
Edouard DUPIN 2018-06-19 22:12:46 +02:00
parent 7f3ffb9a12
commit 19956a6211
4 changed files with 452 additions and 9 deletions

200
audio/algo/speex/Aec.cpp Normal file
View File

@ -0,0 +1,200 @@
/** @file
* @author Edouard DUPIN
* @copyright 2011, Edouard DUPIN, all right reserved
* @license APACHE v2.0 (see license file)
*/
#define speex_POSIX 1
#include <audio/algo/speex/Aec.hpp>
#include <audio/algo/speex/debug.hpp>
#include <cmath>
#include <speex/speex_echo.h>
#include <speex/speex_preprocess.h>
namespace audio {
namespace algo {
namespace speex {
class AecPrivate {
private:
SpeexEchoState* m_speexEchoState = nullptr;
SpeexPreprocessState* m_speexPreprocessState = nullptr;
enum audio::format m_format;
float m_sampleRate;
int8_t m_nbChannel;
etk::Vector<float> m_tmpBufferIn;
etk::Vector<float> m_tmpBufferOut;
int32_t m_frameSize;
uint32_t calculateBlockSize(uint32_t _sampleRate, uint32_t _sizeInMs) {
uint32_t nframes = (_sampleRate * _sizeInMs) / 1000;
uint32_t y = 1 << ((8 * sizeof(uint32_t)) - 2);
if(_sampleRate < 4000) {
AA_SPEEX_INFO("speex: request Sample rate < 4000: " << _sampleRate);
return 32;
}
if(_sizeInMs < 1) {
AA_SPEEX_INFO("speex: request size in ms < 1: " << _sizeInMs);
return 32;
}
// calculate power of 2:
while (y > nframes) {
y >>= 1;
}
return y;
}
public:
AecPrivate(int8_t _nbChannel, float _sampleRate, enum audio::format _format) :
m_format(_format),
m_sampleRate(_sampleRate),
m_nbChannel(_nbChannel) {
bool paramAGC = false;
bool paramDenoise = false;
bool paramEchoSuppress = true;
int32_t paramEchoSuppressAttenuation = -40;
int32_t paramEchoSuppressAttenuationActive = -15;
uint32_t filterSizeMs = 200; // Buffer size in ms [100-500]
uint32_t frameSizeMs = 20; // Frame size in ms [10..20]
m_frameSize = calculateBlockSize(m_sampleRate, frameSizeMs);
AA_SPEEX_INFO("Using framesize: " << m_frameSize);
/*
m_speexEchoState = speex_echo_state_init_mc(m_frameSize,
(m_sampleRate * filterSizeMs) / 1000,
m_nbChannel, // n microphone channel ...
1 // 1 speaker
);
*/
m_speexEchoState = speex_echo_state_init(m_frameSize, calculateBlockSize(m_sampleRate,filterSizeMs));//(m_sampleRate*filterSizeMs)/1000);
if (m_speexEchoState == nullptr) {
AA_SPEEX_ERROR("Can not intanciate speex handle");
return;
}
int sampleRateToInject = _sampleRate;
speex_echo_ctl(m_speexEchoState, SPEEX_ECHO_SET_SAMPLING_RATE, &sampleRateToInject);
if ( paramAGC == true
|| paramDenoise == true
|| paramEchoSuppress == true) {
spx_int32_t tmp;
if (_nbChannel != 1) {
AA_SPEEX_ERROR("support only one channel ... ==> need to create multiple channel instance ...");
return;
}
m_speexPreprocessState = speex_preprocess_state_init(m_frameSize, m_sampleRate);
AA_SPEEX_INFO("Set speex preprocessor AGC=" << paramAGC);
tmp = paramAGC;
speex_preprocess_ctl(m_speexPreprocessState, SPEEX_PREPROCESS_SET_AGC, &tmp);
AA_SPEEX_INFO("Set speex preprocessor DENOISE=" << paramDenoise);
tmp = paramDenoise;
speex_preprocess_ctl(m_speexPreprocessState, SPEEX_PREPROCESS_SET_DENOISE, &tmp);
if (paramEchoSuppress) {
if (paramEchoSuppressAttenuation) {
int32_t tmpEchoSuppressAttenuation;
speex_preprocess_ctl(m_speexPreprocessState, SPEEX_PREPROCESS_GET_ECHO_SUPPRESS, &tmpEchoSuppressAttenuation);
AA_SPEEX_INFO("Set speex preprocessor AEC attenuation=" << paramEchoSuppressAttenuation << " base=" << tmpEchoSuppressAttenuation);
speex_preprocess_ctl(m_speexPreprocessState, SPEEX_PREPROCESS_SET_ECHO_SUPPRESS, &paramEchoSuppressAttenuation);
}
if (paramEchoSuppressAttenuationActive) {
int32_t tmpEchoSuppressAttenuationActive;
speex_preprocess_ctl(m_speexPreprocessState, SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE, &tmpEchoSuppressAttenuationActive);
AA_SPEEX_INFO("Set speex preprocessor AEC attenuation enable=" << paramEchoSuppressAttenuationActive << " base=" << tmpEchoSuppressAttenuationActive);
speex_preprocess_ctl(m_speexPreprocessState, SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE, &paramEchoSuppressAttenuationActive);
}
speex_preprocess_ctl(m_speexPreprocessState, SPEEX_PREPROCESS_SET_ECHO_STATE, m_speexEchoState);
}
} else {
AA_SPEEX_ERROR("request speex with no algorithm");
}
}
~AecPrivate() {
/* TODO : Check this leak ...
if (m_speexPreprocessState) {
speex_preprocess_state_destroy(m_speexPreprocessState);
m_speexPreprocessState = nullptr;
}
if (m_speexEchoState) {
speex_echo_state_destroy(m_speexEchoState);
m_speexEchoState = nullptr;
}
*/
}
/**
* @brief Main input algo process.
* @param[in,out] _output Output data.
* @param[in] _input Input data.
* @param[in] _inputFeedback Input feedback data (all time MONO).
* @param[in] _nbChunk Number of chunk in the input buffer.
* @param[in] _nbChannel Number of channel in the stream.
*/
void process(void* _output, const void* _input, const void* _inputFeedback, size_t _nbChunk) {
if (m_speexEchoState == nullptr) {
AA_SPEEX_ERROR("speex handle is not initialize ==> can not process");
return;
}
switch (m_format) {
case audio::format_int16:
// process APC
speex_echo_cancellation(m_speexEchoState, (const spx_int16_t*)_input, (const spx_int16_t*)_inputFeedback, (spx_int16_t*)_output);
// apply NR after AEC
if (m_speexPreprocessState != nullptr) {
speex_preprocess_run(m_speexPreprocessState, (spx_int16_t*) _output);
}
return;
default:
AA_SPEEX_ERROR("Can not Limit with unsupported format : " << m_format);
break;
}
}
int32_t getOptimalFrameSize() {
return m_frameSize;
}
};
}
}
}
void audio::algo::speex::Aec::init(int8_t _nbChannel, float _sampleRate, enum audio::format _format) {
m_private.reset();
m_private = ememory::makeShared<audio::algo::speex::AecPrivate>(_nbChannel, _sampleRate, _format);
}
etk::Vector<float> audio::algo::speex::Aec::getSupportedSampleRate() {
etk::Vector<float> out;
out.pushBack(4000);
out.pushBack(8000);
out.pushBack(16000);
out.pushBack(32000);
out.pushBack(48000);
return out;
}
etk::Vector<enum audio::format> audio::algo::speex::Aec::getSupportedFormat() {
return getNativeSupportedFormat();
}
etk::Vector<enum audio::format> audio::algo::speex::Aec::getNativeSupportedFormat() {
etk::Vector<enum audio::format> out;
//out.pushBack(audio::format_float);
out.pushBack(audio::format_int16);
return out;
}
void audio::algo::speex::Aec::process(void* _output, const void* _input, const void* _inputFeedback, size_t _nbChunk) {
if (m_private == nullptr) {
AA_SPEEX_ERROR("Algo is not initialized...");
}
m_private->process(_output, _input, _inputFeedback, _nbChunk);
}
int32_t audio::algo::speex::Aec::getOptimalFrameSize() {
if (m_private == nullptr) {
AA_SPEEX_ERROR("Algo is not initialized...");
return 32;
}
return m_private->getOptimalFrameSize();
}

61
audio/algo/speex/Aec.hpp Normal file
View File

@ -0,0 +1,61 @@
/** @file
* @author Edouard DUPIN
* @copyright 2011, Edouard DUPIN, all right reserved
* @license APACHE v2.0 (see license file)
*/
#pragma once
#include <etk/types.hpp>
#include <audio/format.hpp>
#include <chrono>
#include <ememory/memory.hpp>
#include <vector>
namespace audio {
namespace algo {
namespace speex {
class AecPrivate;
class Aec {
public:
Aec() = default;
virtual ~Aec() = default;
public:
/**
* @brief Initialize the Algorithm
* @param[in] _nbChannel Number of channel in the stream.
* @param[in] _sampleRate Sample rate.
* @param[in] _format Input/output data format.
*/
virtual void init(int8_t _nbChannel, float _sampleRate, enum audio::format _format = audio::format_float);
/**
* @brief Get list of samplerate suported.
* @return list of supported samplerate
*/
virtual etk::Vector<float> getSupportedSampleRate();
/**
* @brief Get list of format suported in input.
* @return list of supported format
*/
virtual etk::Vector<enum audio::format> getSupportedFormat();
/**
* @brief Get list of algorithm format suported. No format convertion.
* @return list of supported format
*/
virtual etk::Vector<enum audio::format> getNativeSupportedFormat();
/**
* @brief Main input algo process.
* @param[in,out] _output Output data.
* @param[in] _input Input data.
* @param[in] _inputFeedback Input feedback data (all time MONO).
* @param[in] _nbChunk Number of chunk in the input buffer.
* @param[in] _nbChannel Number of channel in the stream.
*/
virtual void process(void* _output, const void* _input, const void* _inputFeedback, size_t _nbChunk);
int32_t getOptimalFrameSize();
protected:
ememory::SharedPtr<AecPrivate> m_private; //!< private data.
};
}
}
}

View File

@ -27,10 +27,12 @@ def get_version():
def configure(target, my_module):
my_module.add_src_file([
'audio/algo/speex/debug.cpp',
'audio/algo/speex/Resampler.cpp'
'audio/algo/speex/Resampler.cpp',
'audio/algo/speex/Aec.cpp'
])
my_module.add_header_file([
'audio/algo/speex/Resampler.hpp'
'audio/algo/speex/Resampler.hpp',
'audio/algo/speex/Aec.hpp'
])
my_module.add_depend([
'etk',

View File

@ -170,10 +170,71 @@ void performanceResampler() {
}
etk::Vector<int16_t> loadDataI16(etk::String _fileName, int32_t _nbChannel, int32_t _selectChannel, bool _formatFileInteger16, int32_t _delaySample = 0) {
TEST_INFO("Read : '" << _fileName << "'");
etk::Vector<int16_t> out;
int32_t offset = 0;
if (etk::end_with(_fileName, ".wav") == true) {
// remove the first 44 bytes
offset = 44;
}
for (int32_t iii=0; iii<_delaySample; ++iii) {
out.pushBack(0);
}
if (_formatFileInteger16 == true) {
etk::Vector<int16_t> tmpData = etk::FSNodeReadAllDataType<int16_t>(_fileName, offset);
for (int32_t iii=0; iii<tmpData.size(); iii+=_nbChannel) {
out.pushBack(tmpData[iii+_selectChannel]);
}
} else {
etk::Vector<float> tmpData = etk::FSNodeReadAllDataType<float>(_fileName, offset);
for (int32_t iii=0; iii<tmpData.size(); iii+=_nbChannel) {
double val = double(tmpData[iii+_selectChannel])*32768.0;
if (val >= 32767.0) {
out.pushBack(32767);
} else if (val <= -32768.0) {
out.pushBack(-32768);
} else {
out.pushBack(int16_t(val));
}
}
}
TEST_INFO(" " << out.size() << " samples");
return out;
}
etk::Vector<float> loadDataFloat(etk::String _fileName, int32_t _nbChannel, int32_t _selectChannel, bool _formatFileInteger16, int32_t _delaySample = 0) {
TEST_INFO("Read : '" << _fileName << "'");
etk::Vector<float> out;
int32_t offset = 0;
if (etk::end_with(_fileName, ".wav") == true) {
// remove the first 44 bytes
offset = 44;
}
for (int32_t iii=0; iii<_delaySample; ++iii) {
out.pushBack(0.0);
}
if (_formatFileInteger16 == true) {
etk::Vector<int16_t> tmpData = etk::FSNodeReadAllDataType<int16_t>(_fileName, offset);
for (int32_t iii=0; iii<tmpData.size(); iii+=_nbChannel) {
out.pushBack(double(tmpData[iii+_selectChannel])/32768.0);
}
} else {
etk::Vector<float> tmpData = etk::FSNodeReadAllDataType<float>(_fileName, offset);
for (int32_t iii=0; iii<tmpData.size(); iii+=_nbChannel) {
out.pushBack(tmpData[iii+_selectChannel]);
}
}
TEST_INFO(" " << out.size() << " samples");
return out;
}
int main(int _argc, const char** _argv) {
// the only one init for etk:
etk::init(_argc, _argv);
etk::String inputName = "";
etk::String feedbackName = "";
etk::String outputName = "output.raw";
bool performance = false;
bool perf = false;
@ -182,6 +243,14 @@ int main(int _argc, const char** _argv) {
int32_t nbChan = 1;
int32_t quality = 4;
etk::String test = "";
bool formatFileInteger16 = true;
int32_t inputNumberChannel = 1;
int32_t inputSelectChannel = 0;
int32_t inputSampleDelay = 0;
int32_t feedbackNumberChannel = 1;
int32_t feedbackSelectChannel = 0;
int32_t feedbackSampleDelay = 0;
for (int32_t iii=0; iii<_argc ; ++iii) {
etk::String data = _argv[iii];
if (etk::start_with(data,"--in=")) {
@ -195,34 +264,94 @@ int main(int _argc, const char** _argv) {
} else if (etk::start_with(data,"--test=")) {
data = &data[7];
sampleRateIn = etk::string_to_int32_t(data);
} else if (etk::start_with(data,"--format=")) {
if (data == "--format=i16") {
formatFileInteger16 = true;
} else if (data == "--format=float") {
formatFileInteger16 = false;
} else {
TEST_CRITICAL("unsuported format");
}
} else if (etk::start_with(data,"--in-filter=")) {
etk::String tmpData = &data[12];
inputNumberChannel = tmpData.size();
for (int32_t iii = 0; iii< tmpData.size(); ++iii) {
if (tmpData[iii] == '1') {
inputSelectChannel = iii;
TEST_INFO("SELECT input channel : " << inputNumberChannel+1 << " / " << tmpData.size());
break;
}
}
} else if (etk::start_with(data,"--sample-rate-in=")) {
data = &data[17];
sampleRateIn = etk::string_to_int32_t(data);
} else if (etk::start_with(data,"--sample-rate-out=")) {
// ****************************************************
// ** RESAMPLING section
// ****************************************************
} else if ( test == "RESAMPLING"
&& etk::start_with(data,"--sample-rate-out=")) {
data = &data[18];
sampleRateOut = etk::string_to_int32_t(data);
} else if (etk::start_with(data,"--nb=")) {
} else if ( test == "RESAMPLING"
&& etk::start_with(data,"--nb=")) {
data = &data[5];
nbChan = etk::string_to_int32_t(data);
} else if (etk::start_with(data,"--quality=")) {
} else if ( test == "RESAMPLING"
&& etk::start_with(data,"--quality=")) {
data = &data[10];
quality = etk::string_to_int32_t(data);
// ****************************************************
// ** AEC section
// ****************************************************
} else if ( test == "AEC"
&& etk::start_with(data,"--fb-filter=")) {
etk::String tmpData = &data[12];
feedbackNumberChannel = tmpData.size();
for (int32_t iii = 0; iii< tmpData.size(); ++iii) {
if (tmpData[iii] == '1') {
feedbackSelectChannel = iii;
TEST_INFO("SELECT FB channel : " << feedbackSelectChannel+1 << " / " << tmpData.size());
break;
}
}
} else if ( test == "AEC"
&& etk::start_with(data,"--fb=")) {
feedbackName = &data[5];
} else if ( test == "AEC"
&& etk::start_with(data,"--fb-delay=")) {
data = &data[11];
feedbackSampleDelay = etk::string_to_int32_t(data);
} else if ( test == "AEC"
&& etk::start_with(data,"--in-delay=")) {
data = &data[11];
inputSampleDelay = etk::string_to_int32_t(data);
} else if ( data == "-h"
|| data == "--help") {
TEST_PRINT("Help : ");
TEST_PRINT(" ./xxx --fb=file.raw --mic=file.raw");
TEST_PRINT(" --in=YYY.raw input file");
TEST_PRINT(" --in-filter=xxx Select the channel desired in the input stream (n*0 for each channel and 1 for the selected one. ex: 4 channel, secect the third==> 0010) [default 1]");
TEST_PRINT(" --sample-rate-in=XXXX Input signal sample rate (default 48000)");
TEST_PRINT(" --out=zzz.raw output file");
TEST_PRINT(" --format=xxx file Format : i16/float (default i16)");
TEST_PRINT(" --performance Generate signal to force algo to maximum process time");
TEST_PRINT(" --perf Enable performence test (little slower but real performence test)");
TEST_PRINT(" --test=XXXX some test availlable ...");
TEST_PRINT(" RESAMPLING Test resampling data 16 bit mode");
TEST_PRINT(" --sample-rate-in=XXXX Input signal sample rate (default 48000)");
TEST_PRINT(" --sample-rate-out=XXXX Output signal sample rate (default 48000)");
TEST_PRINT(" --quality=XX Resampling quality [0..10] (default 4)");
TEST_PRINT(" --nb=XX Number of channel in the file (default 1)");
TEST_PRINT(" AEC Test AEC (SPEEX AEC is in 16 bits)");
TEST_PRINT(" --fb=XXXX.raw Input Feedback file");
TEST_PRINT(" --fb-filter=xxx Select the chanel desired in the input stream (same as --in-filter)");
TEST_PRINT(" --fb-delay=xxx dalay in sample in the signal feedback (default 0)");
TEST_PRINT(" --in-delay=xxx dalay in sample in the signal input (default 0)");
TEST_PRINT(" example: ");
TEST_PRINT(" ./XXX --test=AEC --fb=aaa_input.wav --in=aaa_input.wav --in-sample-rate=16000 --fb-filter=01 --in-filter=10 --format=i16 --in-delay=64");
exit(0);
} else {
TEST_CRITICAL("unknow parameter : '" << data << "'");
}
}
// PERFORMANCE test only ....
@ -237,7 +366,7 @@ int main(int _argc, const char** _argv) {
exit(-1);
}
TEST_INFO("Read input:");
etk::Vector<int16_t> inputData = etk::FSNodeReadAllDataType<int16_t>(inputName);
etk::Vector<int16_t> inputData = loadDataI16(inputName, inputNumberChannel, inputSelectChannel, formatFileInteger16, inputSampleDelay);
TEST_INFO(" " << inputData.size() << " samples");
// resize output :
etk::Vector<int16_t> output;
@ -262,7 +391,7 @@ int main(int _argc, const char** _argv) {
algo.process(&output[outputPosition], availlableSize, &inputData[iii*blockSize], blockSize);
if (perf == true) {
perfo.toc();
ethread::sleepMilliSeconds((1));
ethread::sleepMilliSeconds(1);
}
outputPosition += availlableSize*nbChan;
}
@ -277,8 +406,59 @@ int main(int _argc, const char** _argv) {
TEST_INFO(" max=" << (float((perfo.getMaxProcessing().get()*sampleRateIn)/blockSize)/1000000000.0)*100.0 << " %");
TEST_INFO(" avg=" << (float(((perfo.getTotalTimeProcessing().get()/perfo.getTotalIteration())*sampleRateIn)/blockSize)/1000000000.0)*100.0 << " %");
}
TEST_PRINT("Store in file : '" << outputName << "' size = " << output.size());
etk::FSNodeWriteAllDataType<int16_t>(outputName, output);
///////////////////////////////////////////////////////////////////////////////////////////////////////////
} else if (test == "AEC") {
// process in chunk of XXX samples represent 10 ms of DATA ==> this is webRTC ...
int32_t blockSize = 32;
etk::Vector<int16_t> inputData = loadDataI16(inputName, inputNumberChannel, inputSelectChannel, formatFileInteger16, inputSampleDelay);
etk::Vector<int16_t> feedbackData = loadDataI16(feedbackName, feedbackNumberChannel, feedbackSelectChannel, formatFileInteger16, feedbackSampleDelay);
//etk::FSNodeWriteAllDataType<int16_t>("bbb_input_I16_1c.raw", inputData);
//etk::FSNodeWriteAllDataType<int16_t>("bbb_feedback_I16_1c.raw", feedbackData);
// resize output :
etk::Vector<int16_t> output;
output.resize(inputData.size(), 0);
Performance perfo;
{
audio::algo::speex::Aec algo;
algo.init(1, sampleRateIn, audio::format_int16);
blockSize = algo.getOptimalFrameSize();
int32_t lastPourcent = -1;
for (int32_t iii=0; iii<output.size()/blockSize; ++iii) {
if (lastPourcent != 100*iii / (output.size()/blockSize)) {
lastPourcent = 100*iii / (output.size()/blockSize);
TEST_INFO("Process : " << iii*blockSize << "/" << int32_t(output.size()/blockSize)*blockSize << " " << lastPourcent << "/100");
} else {
TEST_VERBOSE("Process : " << iii*blockSize << "/" << int32_t(output.size()/blockSize)*blockSize);
}
perfo.tic();
algo.process(&output[iii*blockSize], &inputData[iii*blockSize], &feedbackData[iii*blockSize], blockSize);
if (perf == true) {
perfo.toc();
ethread::sleepMilliSeconds(1);
}
}
}
TEST_PRINT("Process done");
if (perf == true) {
TEST_PRINT("Performance Result: ");
TEST_INFO(" blockSize=" << blockSize << " sample");
TEST_INFO(" min < avg < max =" << perfo.getMinProcessing().count() << "ns < "
<< perfo.getTotalTimeProcessing().count()/perfo.getTotalIteration() << "ns < "
<< perfo.getMaxProcessing().count() << "ns ");
float avg = (float(((perfo.getTotalTimeProcessing().count()/perfo.getTotalIteration())*sampleRateIn)/double(blockSize))/1000000000.0)*100.0;
TEST_INFO(" min < avg < max= " << (float((perfo.getMinProcessing().count()*sampleRateIn)/double(blockSize))/1000000000.0)*100.0 << "% < "
<< avg << "% < "
<< (float((perfo.getMaxProcessing().count()*sampleRateIn)/double(blockSize))/1000000000.0)*100.0 << "%");
TEST_PRINT("float : " << sampleRateIn << " : " << avg << "%");
}
TEST_PRINT("Store in file : '" << outputName << "' size = " << output.size());
etk::FSNodeWriteAllDataType<int16_t>(outputName, output);
}
TEST_PRINT(" ***************************************");
TEST_PRINT(" ** APPLICATION FINISHED OK **");
TEST_PRINT(" ***************************************");
}