Add 48kHz support to AGC

Doing the same for the 16-24kHz band than was done in the 8-16kHz.
Results look and sound as nice.

Originally reviewed here:
https://webrtc-codereview.appspot.com/26339004/

BUG=webrtc:3146
R=andrew@webrtc.org, kwiberg@webrtc.org

Review URL: https://webrtc-codereview.appspot.com/28299004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@7917 4adac7df-926f-26a2-2b94-8c16560cd09d
This commit is contained in:
aluebs@webrtc.org 2014-12-16 20:56:09 +00:00
parent 2510d11c0f
commit cf6d0b64ef
5 changed files with 93 additions and 142 deletions

View File

@ -112,7 +112,7 @@ static const int32_t kTargetLevelTable[64] = {134209536, 106606424, 84680493, 67
6726, 5343, 4244, 3371, 2678, 2127, 1690, 1342, 1066, 847, 673, 534, 424, 337, 268,
213, 169, 134, 107, 85, 67};
int WebRtcAgc_AddMic(void *state, int16_t *in_mic, int16_t *in_mic_H,
int WebRtcAgc_AddMic(void *state, int16_t* const* in_mic, int16_t num_bands,
int16_t samples)
{
int32_t nrg, max_nrg, sample, tmp32;
@ -134,17 +134,6 @@ int WebRtcAgc_AddMic(void *state, int16_t *in_mic, int16_t *in_mic_H,
}
}
/* Check for valid pointers based on sampling rate */
if ((stt->fs == 32000) && (in_mic_H == NULL))
{
return -1;
}
/* Check for valid pointer for low band */
if (in_mic == NULL)
{
return -1;
}
/* apply slowly varying digital gain */
if (stt->micVol > stt->maxAnalog)
{
@ -175,32 +164,19 @@ int WebRtcAgc_AddMic(void *state, int16_t *in_mic, int16_t *in_mic_H,
for (i = 0; i < samples; i++)
{
// For lower band
sample = (in_mic[i] * gain) >> 12;
if (sample > 32767)
int j;
for (j = 0; j < num_bands; ++j)
{
in_mic[i] = 32767;
} else if (sample < -32768)
{
in_mic[i] = -32768;
} else
{
in_mic[i] = (int16_t)sample;
}
// For higher band
if (stt->fs == 32000)
{
sample = (in_mic_H[i] * gain) >> 12;
sample = (in_mic[j][i] * gain) >> 12;
if (sample > 32767)
{
in_mic_H[i] = 32767;
in_mic[j][i] = 32767;
} else if (sample < -32768)
{
in_mic_H[i] = -32768;
in_mic[j][i] = -32768;
} else
{
in_mic_H[i] = (int16_t)sample;
in_mic[j][i] = (int16_t)sample;
}
}
}
@ -224,7 +200,8 @@ int WebRtcAgc_AddMic(void *state, int16_t *in_mic, int16_t *in_mic_H,
max_nrg = 0;
for (n = 0; n < L; n++)
{
nrg = WEBRTC_SPL_MUL_16_16(in_mic[i * L + n], in_mic[i * L + n]);
nrg = WEBRTC_SPL_MUL_16_16(in_mic[0][i * L + n],
in_mic[0][i * L + n]);
if (nrg > max_nrg)
{
max_nrg = nrg;
@ -246,10 +223,13 @@ int WebRtcAgc_AddMic(void *state, int16_t *in_mic, int16_t *in_mic_H,
{
if (stt->fs == 16000)
{
WebRtcSpl_DownsampleBy2(&in_mic[i * 32], 32, tmp_speech, stt->filterState);
WebRtcSpl_DownsampleBy2(&in_mic[0][i * 32],
32,
tmp_speech,
stt->filterState);
} else
{
memcpy(tmp_speech, &in_mic[i * 16], 16 * sizeof(short));
memcpy(tmp_speech, &in_mic[0][i * 16], 16 * sizeof(short));
}
/* Compute energy in blocks of 16 samples */
ptr[i] = WebRtcSpl_DotProductWithScale(tmp_speech, tmp_speech, 16, 4);
@ -265,7 +245,7 @@ int WebRtcAgc_AddMic(void *state, int16_t *in_mic, int16_t *in_mic_H,
}
/* call VAD (use low band only) */
WebRtcAgc_ProcessVad(&stt->vadMic, in_mic, samples);
WebRtcAgc_ProcessVad(&stt->vadMic, in_mic[0], samples);
return 0;
}
@ -286,7 +266,7 @@ int WebRtcAgc_AddFarend(void *state, const int16_t *in_far, int16_t samples)
{
return -1;
}
} else if (stt->fs == 16000 || stt->fs == 32000)
} else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000)
{
if (samples != 160)
{
@ -300,13 +280,13 @@ int WebRtcAgc_AddFarend(void *state, const int16_t *in_far, int16_t samples)
return WebRtcAgc_AddFarendToDigital(&stt->digitalAgc, in_far, samples);
}
int WebRtcAgc_VirtualMic(void *agcInst, int16_t *in_near, int16_t *in_near_H,
int16_t samples, int32_t micLevelIn,
int WebRtcAgc_VirtualMic(void *agcInst, int16_t* const* in_near,
int16_t num_bands, int16_t samples, int32_t micLevelIn,
int32_t *micLevelOut)
{
int32_t tmpFlt, micLevelTmp, gainIdx;
uint16_t gain;
int16_t ii;
int16_t ii, j;
Agc_t *stt;
uint32_t nrg;
@ -329,7 +309,7 @@ int WebRtcAgc_VirtualMic(void *agcInst, int16_t *in_near, int16_t *in_near_H,
frameNrgLimit = frameNrgLimit << 1;
}
frameNrg = WEBRTC_SPL_MUL_16_16(in_near[0], in_near[0]);
frameNrg = WEBRTC_SPL_MUL_16_16(in_near[0][0], in_near[0][0]);
for (sampleCntr = 1; sampleCntr < samples; sampleCntr++)
{
@ -337,12 +317,14 @@ int WebRtcAgc_VirtualMic(void *agcInst, int16_t *in_near, int16_t *in_near_H,
// the correct value of the energy is not important
if (frameNrg < frameNrgLimit)
{
nrg = WEBRTC_SPL_MUL_16_16(in_near[sampleCntr], in_near[sampleCntr]);
nrg = WEBRTC_SPL_MUL_16_16(in_near[0][sampleCntr],
in_near[0][sampleCntr]);
frameNrg += nrg;
}
// Count the zero crossings
numZeroCrossing += ((in_near[sampleCntr] ^ in_near[sampleCntr - 1]) < 0);
numZeroCrossing +=
((in_near[0][sampleCntr] ^ in_near[0][sampleCntr - 1]) < 0);
}
if ((frameNrg < 500) || (numZeroCrossing <= 5))
@ -389,7 +371,7 @@ int WebRtcAgc_VirtualMic(void *agcInst, int16_t *in_near, int16_t *in_near_H,
}
for (ii = 0; ii < samples; ii++)
{
tmpFlt = (in_near[ii] * gain) >> 10;
tmpFlt = (in_near[0][ii] * gain) >> 10;
if (tmpFlt > 32767)
{
tmpFlt = 32767;
@ -414,10 +396,10 @@ int WebRtcAgc_VirtualMic(void *agcInst, int16_t *in_near, int16_t *in_near_H,
gain = kSuppressionTableVirtualMic[127 - gainIdx];
}
}
in_near[ii] = (int16_t)tmpFlt;
if (stt->fs == 32000)
in_near[0][ii] = (int16_t)tmpFlt;
for (j = 1; j < num_bands; ++j)
{
tmpFlt = (in_near_H[ii] * gain) >> 10;
tmpFlt = (in_near[j][ii] * gain) >> 10;
if (tmpFlt > 32767)
{
tmpFlt = 32767;
@ -426,7 +408,7 @@ int WebRtcAgc_VirtualMic(void *agcInst, int16_t *in_near, int16_t *in_near_H,
{
tmpFlt = -32768;
}
in_near_H[ii] = (int16_t)tmpFlt;
in_near[j][ii] = (int16_t)tmpFlt;
}
}
/* Set the level we (finally) used */
@ -434,7 +416,7 @@ int WebRtcAgc_VirtualMic(void *agcInst, int16_t *in_near, int16_t *in_near_H,
// *micLevelOut = stt->micGainIdx;
*micLevelOut = stt->micGainIdx >> stt->scale;
/* Add to Mic as if it was the output from a true microphone */
if (WebRtcAgc_AddMic(agcInst, in_near, in_near_H, samples) != 0)
if (WebRtcAgc_AddMic(agcInst, in_near, num_bands, samples) != 0)
{
return -1;
}
@ -1158,9 +1140,9 @@ int32_t WebRtcAgc_ProcessAnalog(void *state, int32_t inMicLevel,
return 0;
}
int WebRtcAgc_Process(void *agcInst, const int16_t *in_near,
const int16_t *in_near_H, int16_t samples,
int16_t *out, int16_t *out_H, int32_t inMicLevel,
int WebRtcAgc_Process(void *agcInst, const int16_t* const* in_near,
int16_t num_bands, int16_t samples,
int16_t* const* out, int32_t inMicLevel,
int32_t *outMicLevel, int16_t echo,
uint8_t *saturationWarning)
{
@ -1182,7 +1164,7 @@ int WebRtcAgc_Process(void *agcInst, const int16_t *in_near,
{
return -1;
}
} else if (stt->fs == 16000 || stt->fs == 32000)
} else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000)
{
if (samples != 160)
{
@ -1193,17 +1175,6 @@ int WebRtcAgc_Process(void *agcInst, const int16_t *in_near,
return -1;
}
/* Check for valid pointers based on sampling rate */
if (stt->fs == 32000 && in_near_H == NULL)
{
return -1;
}
/* Check for valid pointers for low band */
if (in_near == NULL)
{
return -1;
}
*saturationWarning = 0;
//TODO: PUT IN RANGE CHECKING FOR INPUT LEVELS
*outMicLevel = inMicLevel;
@ -1214,9 +1185,8 @@ int WebRtcAgc_Process(void *agcInst, const int16_t *in_near,
if (WebRtcAgc_ProcessDigital(&stt->digitalAgc,
in_near,
in_near_H,
num_bands,
out,
out_H,
stt->fs,
stt->lowLevelSignal) == -1)
{

View File

@ -293,9 +293,11 @@ int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc_t *stt, const int16_t *in_far,
return 0;
}
int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const int16_t *in_near,
const int16_t *in_near_H, int16_t *out,
int16_t *out_H, uint32_t FS,
int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *stt,
const int16_t* const* in_near,
int16_t num_bands,
int16_t* const* out,
uint32_t FS,
int16_t lowlevelSignal)
{
// array for gains (one value per ms, incl start & end)
@ -303,7 +305,7 @@ int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const int16_t *in_near,
int32_t out_tmp, tmp32;
int32_t env[10];
int32_t nrg, max_nrg;
int32_t max_nrg;
int32_t cur_level;
int32_t gain32, delta;
int16_t logratio;
@ -311,7 +313,7 @@ int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const int16_t *in_near,
int16_t zeros = 0, zeros_fast, frac = 0;
int16_t decay;
int16_t gate, gain_adj;
int16_t k, n;
int16_t k, n, i;
int16_t L, L2; // samples/subframe
// determine number of samples per ms
@ -319,7 +321,7 @@ int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const int16_t *in_near,
{
L = 8;
L2 = 3;
} else if (FS == 16000 || FS == 32000)
} else if (FS == 16000 || FS == 32000 || FS == 48000)
{
L = 16;
L2 = 4;
@ -328,20 +330,16 @@ int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const int16_t *in_near,
return -1;
}
if (in_near != out)
for (i = 0; i < num_bands; ++i)
{
// Only needed if they don't already point to the same place.
memcpy(out, in_near, 10 * L * sizeof(int16_t));
}
if (FS == 32000)
{
if (in_near_H != out_H)
if (in_near[i] != out[i])
{
memcpy(out_H, in_near_H, 10 * L * sizeof(int16_t));
// Only needed if they don't already point to the same place.
memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0]));
}
}
// VAD for near end
logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out, L * 10);
logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out[0], L * 10);
// Account for far end VAD
if (stt->vadFarend.counter > 10)
@ -407,7 +405,8 @@ int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const int16_t *in_near,
max_nrg = 0;
for (n = 0; n < L; n++)
{
nrg = WEBRTC_SPL_MUL_16_16(out[k * L + n], out[k * L + n]);
int32_t nrg = WEBRTC_SPL_MUL_16_16(out[0][k * L + n],
out[0][k * L + n]);
if (nrg > max_nrg)
{
max_nrg = nrg;
@ -568,35 +567,20 @@ int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const int16_t *in_near,
// iterate over samples
for (n = 0; n < L; n++)
{
// For lower band
tmp32 = out[n] * ((gain32 + 127) >> 7);
out_tmp = tmp32 >> 16;
if (out_tmp > 4095)
for (i = 0; i < num_bands; ++i)
{
out[n] = (int16_t)32767;
} else if (out_tmp < -4096)
{
out[n] = (int16_t)-32768;
} else
{
tmp32 = out[n] * (gain32 >> 4);
out[n] = (int16_t)(tmp32 >> 16);
}
// For higher band
if (FS == 32000)
{
tmp32 = out_H[n] * ((gain32 + 127) >> 7);
tmp32 = out[i][n] * ((gain32 + 127) >> 7);
out_tmp = tmp32 >> 16;
if (out_tmp > 4095)
{
out_H[n] = (int16_t)32767;
out[i][n] = (int16_t)32767;
} else if (out_tmp < -4096)
{
out_H[n] = (int16_t)-32768;
out[i][n] = (int16_t)-32768;
} else
{
tmp32 = out_H[n] * (gain32 >> 4);
out_H[n] = (int16_t)(tmp32 >> 16);
tmp32 = out[i][n] * (gain32 >> 4);
out[i][n] = (int16_t)(tmp32 >> 16);
}
}
//
@ -611,14 +595,10 @@ int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const int16_t *in_near,
// iterate over samples
for (n = 0; n < L; n++)
{
// For lower band
tmp32 = out[k * L + n] * (gain32 >> 4);
out[k * L + n] = (int16_t)(tmp32 >> 16);
// For higher band
if (FS == 32000)
for (i = 0; i < num_bands; ++i)
{
tmp32 = out_H[k * L + n] * (gain32 >> 4);
out_H[k * L + n] = (int16_t)(tmp32 >> 16);
tmp32 = out[i][k * L + n] * (gain32 >> 4);
out[i][k * L + n] = (int16_t)(tmp32 >> 16);
}
gain32 += delta;
}

View File

@ -55,8 +55,10 @@ typedef struct
int32_t WebRtcAgc_InitDigital(DigitalAgc_t *digitalAgcInst, int16_t agcMode);
int32_t WebRtcAgc_ProcessDigital(DigitalAgc_t *digitalAgcInst,
const int16_t *inNear, const int16_t *inNear_H,
int16_t *out, int16_t *out_H, uint32_t FS,
const int16_t* const* inNear,
int16_t num_bands,
int16_t* const* out,
uint32_t FS,
int16_t lowLevelSignal);
int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc_t *digitalAgcInst,

View File

@ -52,7 +52,8 @@ extern "C"
/*
* This function processes a 10 ms frame of far-end speech to determine
* if there is active speech. The length of the input speech vector must be
* given in samples (80 when FS=8000, and 160 when FS=16000 or FS=32000).
* given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or
* FS=48000).
*
* Input:
* - agcInst : AGC instance.
@ -70,17 +71,17 @@ int WebRtcAgc_AddFarend(void* agcInst,
/*
* This function processes a 10 ms frame of microphone speech to determine
* if there is active speech. The length of the input speech vector must be
* given in samples (80 when FS=8000, and 160 when FS=16000 or FS=32000). For
* very low input levels, the input signal is increased in level by multiplying
* and overwriting the samples in inMic[].
* given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or
* FS=48000). For very low input levels, the input signal is increased in level
* by multiplying and overwriting the samples in inMic[].
*
* This function should be called before any further processing of the
* near-end microphone signal.
*
* Input:
* - agcInst : AGC instance.
* - inMic : Microphone input speech vector for L band
* - inMic_H : Microphone input speech vector for H band
* - inMic : Microphone input speech vector for each band
* - num_bands : Number of bands in input vector
* - samples : Number of samples in input vector
*
* Return value:
@ -88,8 +89,8 @@ int WebRtcAgc_AddFarend(void* agcInst,
* : -1 - Error
*/
int WebRtcAgc_AddMic(void* agcInst,
int16_t* inMic,
int16_t* inMic_H,
int16_t* const* inMic,
int16_t num_bands,
int16_t samples);
/*
@ -97,12 +98,12 @@ int WebRtcAgc_AddMic(void* agcInst,
* It is a digital gain applied to the input signal and is used in the
* agcAdaptiveDigital mode where no microphone level is adjustable. The length
* of the input speech vector must be given in samples (80 when FS=8000, and 160
* when FS=16000 or FS=32000).
* when FS=16000, FS=32000 or FS=48000).
*
* Input:
* - agcInst : AGC instance.
* - inMic : Microphone input speech vector for L band
* - inMic_H : Microphone input speech vector for H band
* - inMic : Microphone input speech vector for each band
* - num_bands : Number of bands in input vector
* - samples : Number of samples in input vector
* - micLevelIn : Input level of microphone (static)
*
@ -116,8 +117,8 @@ int WebRtcAgc_AddMic(void* agcInst,
* : -1 - Error
*/
int WebRtcAgc_VirtualMic(void* agcInst,
int16_t* inMic,
int16_t* inMic_H,
int16_t* const* inMic,
int16_t num_bands,
int16_t samples,
int32_t micLevelIn,
int32_t* micLevelOut);
@ -126,16 +127,17 @@ int WebRtcAgc_VirtualMic(void* agcInst,
* This function processes a 10 ms frame and adjusts (normalizes) the gain both
* analog and digitally. The gain adjustments are done only during active
* periods of speech. The length of the speech vectors must be given in samples
* (80 when FS=8000, and 160 when FS=16000 or FS=32000). The echo parameter can
* be used to ensure the AGC will not adjust upward in the presence of echo.
* (80 when FS=8000, and 160 when FS=16000, FS=32000 or FS=48000). The echo
* parameter can be used to ensure the AGC will not adjust upward in the
* presence of echo.
*
* This function should be called after processing the near-end microphone
* signal, in any case after any echo cancellation.
*
* Input:
* - agcInst : AGC instance
* - inNear : Near-end input speech vector for L band
* - inNear_H : Near-end input speech vector for H band
* - inNear : Near-end input speech vector for each band
* - num_bands : Number of bands in input/output vector
* - samples : Number of samples in input/output vector
* - inMicLevel : Current microphone volume level
* - echo : Set to 0 if the signal passed to add_mic is
@ -145,9 +147,8 @@ int WebRtcAgc_VirtualMic(void* agcInst,
*
* Output:
* - outMicLevel : Adjusted microphone volume level
* - out : Gain-adjusted near-end speech vector (L band)
* - out : Gain-adjusted near-end speech vector
* : May be the same vector as the input.
* - out_H : Gain-adjusted near-end speech vector (H band)
* - saturationWarning : A returned value of 1 indicates a saturation event
* has occurred and the volume cannot be further
* reduced. Otherwise will be set to 0.
@ -157,11 +158,10 @@ int WebRtcAgc_VirtualMic(void* agcInst,
* : -1 - Error
*/
int WebRtcAgc_Process(void* agcInst,
const int16_t* inNear,
const int16_t* inNear_H,
const int16_t* const* inNear,
int16_t num_bands,
int16_t samples,
int16_t* out,
int16_t* out_H,
int16_t* const* out,
int32_t inMicLevel,
int32_t* outMicLevel,
int16_t echo,

View File

@ -90,8 +90,8 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
Handle* my_handle = static_cast<Handle*>(handle(i));
err = WebRtcAgc_AddMic(
my_handle,
audio->split_bands(i)[kBand0To8kHz],
audio->split_bands(i)[kBand8To16kHz],
audio->split_bands(i),
audio->num_bands(),
static_cast<int16_t>(audio->samples_per_split_channel()));
if (err != apm_->kNoError) {
@ -106,8 +106,8 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
err = WebRtcAgc_VirtualMic(
my_handle,
audio->split_bands(i)[kBand0To8kHz],
audio->split_bands(i)[kBand8To16kHz],
audio->split_bands(i),
audio->num_bands(),
static_cast<int16_t>(audio->samples_per_split_channel()),
analog_capture_level_,
&capture_level_out);
@ -144,11 +144,10 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio) {
int err = WebRtcAgc_Process(
my_handle,
audio->split_bands_const(i)[kBand0To8kHz],
audio->split_bands_const(i)[kBand8To16kHz],
audio->split_bands_const(i),
audio->num_bands(),
static_cast<int16_t>(audio->samples_per_split_channel()),
audio->split_bands(i)[kBand0To8kHz],
audio->split_bands(i)[kBand8To16kHz],
audio->split_bands(i),
capture_levels_[i],
&capture_level_out,
apm_->echo_cancellation()->stream_has_echo(),