avfilter/af_silenceremove: add peak detector
Signed-off-by: Paul B Mahol <onemda@gmail.com>
This commit is contained in:
parent
3215342121
commit
1809894b2a
@ -2969,6 +2969,10 @@ at the beginning of each period of silence.
|
|||||||
For example, if you want to remove long pauses between words but do not want
|
For example, if you want to remove long pauses between words but do not want
|
||||||
to remove the pauses completely. Default value is @code{0}.
|
to remove the pauses completely. Default value is @code{0}.
|
||||||
|
|
||||||
|
@item detection
|
||||||
|
Set how is silence detected. Can be @code{rms} or @code{peak}. Second is faster
|
||||||
|
and works better with digital silence which is exactly 0.
|
||||||
|
Default value is @code{rms}.
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
@subsection Examples
|
@subsection Examples
|
||||||
|
@ -65,11 +65,15 @@ typedef struct SilenceRemoveContext {
|
|||||||
double *window_current;
|
double *window_current;
|
||||||
double *window_end;
|
double *window_end;
|
||||||
int window_size;
|
int window_size;
|
||||||
double rms_sum;
|
double sum;
|
||||||
|
|
||||||
int leave_silence;
|
int leave_silence;
|
||||||
int restart;
|
int restart;
|
||||||
int64_t next_pts;
|
int64_t next_pts;
|
||||||
|
|
||||||
|
int detection;
|
||||||
|
void (*update)(struct SilenceRemoveContext *s, double sample);
|
||||||
|
double(*compute)(struct SilenceRemoveContext *s, double sample);
|
||||||
} SilenceRemoveContext;
|
} SilenceRemoveContext;
|
||||||
|
|
||||||
#define OFFSET(x) offsetof(SilenceRemoveContext, x)
|
#define OFFSET(x) offsetof(SilenceRemoveContext, x)
|
||||||
@ -82,11 +86,58 @@ static const AVOption silenceremove_options[] = {
|
|||||||
{ "stop_duration", NULL, OFFSET(stop_duration), AV_OPT_TYPE_DURATION, {.i64=0}, 0, 9000, FLAGS },
|
{ "stop_duration", NULL, OFFSET(stop_duration), AV_OPT_TYPE_DURATION, {.i64=0}, 0, 9000, FLAGS },
|
||||||
{ "stop_threshold", NULL, OFFSET(stop_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, FLAGS },
|
{ "stop_threshold", NULL, OFFSET(stop_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, FLAGS },
|
||||||
{ "leave_silence", NULL, OFFSET(leave_silence), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
|
{ "leave_silence", NULL, OFFSET(leave_silence), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
|
||||||
|
{ "detection", NULL, OFFSET(detection), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS, "detection" },
|
||||||
|
{ "peak", 0, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "detection" },
|
||||||
|
{ "rms", 0, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "detection" },
|
||||||
{ NULL }
|
{ NULL }
|
||||||
};
|
};
|
||||||
|
|
||||||
AVFILTER_DEFINE_CLASS(silenceremove);
|
AVFILTER_DEFINE_CLASS(silenceremove);
|
||||||
|
|
||||||
|
static double compute_peak(SilenceRemoveContext *s, double sample)
|
||||||
|
{
|
||||||
|
double new_sum;
|
||||||
|
|
||||||
|
new_sum = s->sum;
|
||||||
|
new_sum -= *s->window_current;
|
||||||
|
new_sum += fabs(sample);
|
||||||
|
|
||||||
|
return new_sum / s->window_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void update_peak(SilenceRemoveContext *s, double sample)
|
||||||
|
{
|
||||||
|
s->sum -= *s->window_current;
|
||||||
|
*s->window_current = fabs(sample);
|
||||||
|
s->sum += *s->window_current;
|
||||||
|
|
||||||
|
s->window_current++;
|
||||||
|
if (s->window_current >= s->window_end)
|
||||||
|
s->window_current = s->window;
|
||||||
|
}
|
||||||
|
|
||||||
|
static double compute_rms(SilenceRemoveContext *s, double sample)
|
||||||
|
{
|
||||||
|
double new_sum;
|
||||||
|
|
||||||
|
new_sum = s->sum;
|
||||||
|
new_sum -= *s->window_current;
|
||||||
|
new_sum += sample * sample;
|
||||||
|
|
||||||
|
return sqrt(new_sum / s->window_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void update_rms(SilenceRemoveContext *s, double sample)
|
||||||
|
{
|
||||||
|
s->sum -= *s->window_current;
|
||||||
|
*s->window_current = sample * sample;
|
||||||
|
s->sum += *s->window_current;
|
||||||
|
|
||||||
|
s->window_current++;
|
||||||
|
if (s->window_current >= s->window_end)
|
||||||
|
s->window_current = s->window;
|
||||||
|
}
|
||||||
|
|
||||||
static av_cold int init(AVFilterContext *ctx)
|
static av_cold int init(AVFilterContext *ctx)
|
||||||
{
|
{
|
||||||
SilenceRemoveContext *s = ctx->priv;
|
SilenceRemoveContext *s = ctx->priv;
|
||||||
@ -96,16 +147,27 @@ static av_cold int init(AVFilterContext *ctx)
|
|||||||
s->restart = 1;
|
s->restart = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
switch (s->detection) {
|
||||||
|
case 0:
|
||||||
|
s->update = update_peak;
|
||||||
|
s->compute = compute_peak;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
s->update = update_rms;
|
||||||
|
s->compute = compute_rms;
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void clear_rms(SilenceRemoveContext *s)
|
static void clear_window(SilenceRemoveContext *s)
|
||||||
{
|
{
|
||||||
memset(s->window, 0, s->window_size * sizeof(*s->window));
|
memset(s->window, 0, s->window_size * sizeof(*s->window));
|
||||||
|
|
||||||
s->window_current = s->window;
|
s->window_current = s->window;
|
||||||
s->window_end = s->window + s->window_size;
|
s->window_end = s->window + s->window_size;
|
||||||
s->rms_sum = 0;
|
s->sum = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int config_input(AVFilterLink *inlink)
|
static int config_input(AVFilterLink *inlink)
|
||||||
@ -118,7 +180,7 @@ static int config_input(AVFilterLink *inlink)
|
|||||||
if (!s->window)
|
if (!s->window)
|
||||||
return AVERROR(ENOMEM);
|
return AVERROR(ENOMEM);
|
||||||
|
|
||||||
clear_rms(s);
|
clear_window(s);
|
||||||
|
|
||||||
s->start_duration = av_rescale(s->start_duration, inlink->sample_rate,
|
s->start_duration = av_rescale(s->start_duration, inlink->sample_rate,
|
||||||
AV_TIME_BASE);
|
AV_TIME_BASE);
|
||||||
@ -153,28 +215,6 @@ static int config_input(AVFilterLink *inlink)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static double compute_rms(SilenceRemoveContext *s, double sample)
|
|
||||||
{
|
|
||||||
double new_sum;
|
|
||||||
|
|
||||||
new_sum = s->rms_sum;
|
|
||||||
new_sum -= *s->window_current;
|
|
||||||
new_sum += sample * sample;
|
|
||||||
|
|
||||||
return sqrt(new_sum / s->window_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void update_rms(SilenceRemoveContext *s, double sample)
|
|
||||||
{
|
|
||||||
s->rms_sum -= *s->window_current;
|
|
||||||
*s->window_current = sample * sample;
|
|
||||||
s->rms_sum += *s->window_current;
|
|
||||||
|
|
||||||
s->window_current++;
|
|
||||||
if (s->window_current >= s->window_end)
|
|
||||||
s->window_current = s->window;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void flush(AVFrame *out, AVFilterLink *outlink,
|
static void flush(AVFrame *out, AVFilterLink *outlink,
|
||||||
int *nb_samples_written, int *ret)
|
int *nb_samples_written, int *ret)
|
||||||
{
|
{
|
||||||
@ -209,12 +249,12 @@ silence_trim:
|
|||||||
for (i = 0; i < nbs; i++) {
|
for (i = 0; i < nbs; i++) {
|
||||||
threshold = 0;
|
threshold = 0;
|
||||||
for (j = 0; j < inlink->channels; j++) {
|
for (j = 0; j < inlink->channels; j++) {
|
||||||
threshold |= compute_rms(s, ibuf[j]) > s->start_threshold;
|
threshold |= s->compute(s, ibuf[j]) > s->start_threshold;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (threshold) {
|
if (threshold) {
|
||||||
for (j = 0; j < inlink->channels; j++) {
|
for (j = 0; j < inlink->channels; j++) {
|
||||||
update_rms(s, *ibuf);
|
s->update(s, *ibuf);
|
||||||
s->start_holdoff[s->start_holdoff_end++] = *ibuf++;
|
s->start_holdoff[s->start_holdoff_end++] = *ibuf++;
|
||||||
nb_samples_read++;
|
nb_samples_read++;
|
||||||
}
|
}
|
||||||
@ -232,7 +272,7 @@ silence_trim:
|
|||||||
s->start_holdoff_end = 0;
|
s->start_holdoff_end = 0;
|
||||||
|
|
||||||
for (j = 0; j < inlink->channels; j++)
|
for (j = 0; j < inlink->channels; j++)
|
||||||
update_rms(s, ibuf[j]);
|
s->update(s, ibuf[j]);
|
||||||
|
|
||||||
ibuf += inlink->channels;
|
ibuf += inlink->channels;
|
||||||
nb_samples_read += inlink->channels;
|
nb_samples_read += inlink->channels;
|
||||||
@ -284,7 +324,7 @@ silence_copy:
|
|||||||
for (i = 0; i < nbs; i++) {
|
for (i = 0; i < nbs; i++) {
|
||||||
threshold = 1;
|
threshold = 1;
|
||||||
for (j = 0; j < inlink->channels; j++)
|
for (j = 0; j < inlink->channels; j++)
|
||||||
threshold &= compute_rms(s, ibuf[j]) > s->stop_threshold;
|
threshold &= s->compute(s, ibuf[j]) > s->stop_threshold;
|
||||||
|
|
||||||
if (threshold && s->stop_holdoff_end && !s->leave_silence) {
|
if (threshold && s->stop_holdoff_end && !s->leave_silence) {
|
||||||
s->mode = SILENCE_COPY_FLUSH;
|
s->mode = SILENCE_COPY_FLUSH;
|
||||||
@ -292,14 +332,14 @@ silence_copy:
|
|||||||
goto silence_copy_flush;
|
goto silence_copy_flush;
|
||||||
} else if (threshold) {
|
} else if (threshold) {
|
||||||
for (j = 0; j < inlink->channels; j++) {
|
for (j = 0; j < inlink->channels; j++) {
|
||||||
update_rms(s, *ibuf);
|
s->update(s, *ibuf);
|
||||||
*obuf++ = *ibuf++;
|
*obuf++ = *ibuf++;
|
||||||
nb_samples_read++;
|
nb_samples_read++;
|
||||||
nb_samples_written++;
|
nb_samples_written++;
|
||||||
}
|
}
|
||||||
} else if (!threshold) {
|
} else if (!threshold) {
|
||||||
for (j = 0; j < inlink->channels; j++) {
|
for (j = 0; j < inlink->channels; j++) {
|
||||||
update_rms(s, *ibuf);
|
s->update(s, *ibuf);
|
||||||
if (s->leave_silence) {
|
if (s->leave_silence) {
|
||||||
*obuf++ = *ibuf;
|
*obuf++ = *ibuf;
|
||||||
nb_samples_written++;
|
nb_samples_written++;
|
||||||
@ -323,7 +363,7 @@ silence_copy:
|
|||||||
s->start_found_periods = 0;
|
s->start_found_periods = 0;
|
||||||
s->start_holdoff_offset = 0;
|
s->start_holdoff_offset = 0;
|
||||||
s->start_holdoff_end = 0;
|
s->start_holdoff_end = 0;
|
||||||
clear_rms(s);
|
clear_window(s);
|
||||||
s->mode = SILENCE_TRIM;
|
s->mode = SILENCE_TRIM;
|
||||||
flush(out, outlink, &nb_samples_written, &ret);
|
flush(out, outlink, &nb_samples_written, &ret);
|
||||||
goto silence_trim;
|
goto silence_trim;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user