avfilter/af_silenceremove: add peak detector
Signed-off-by: Paul B Mahol <onemda@gmail.com>
This commit is contained in:
parent
3215342121
commit
1809894b2a
@ -2969,6 +2969,10 @@ at the beginning of each period of silence.
|
||||
For example, if you want to remove long pauses between words but do not want
|
||||
to remove the pauses completely. Default value is @code{0}.
|
||||
|
||||
@item detection
|
||||
Set how is silence detected. Can be @code{rms} or @code{peak}. Second is faster
|
||||
and works better with digital silence which is exactly 0.
|
||||
Default value is @code{rms}.
|
||||
@end table
|
||||
|
||||
@subsection Examples
|
||||
|
@ -65,11 +65,15 @@ typedef struct SilenceRemoveContext {
|
||||
double *window_current;
|
||||
double *window_end;
|
||||
int window_size;
|
||||
double rms_sum;
|
||||
double sum;
|
||||
|
||||
int leave_silence;
|
||||
int restart;
|
||||
int64_t next_pts;
|
||||
|
||||
int detection;
|
||||
void (*update)(struct SilenceRemoveContext *s, double sample);
|
||||
double(*compute)(struct SilenceRemoveContext *s, double sample);
|
||||
} SilenceRemoveContext;
|
||||
|
||||
#define OFFSET(x) offsetof(SilenceRemoveContext, x)
|
||||
@ -82,11 +86,58 @@ static const AVOption silenceremove_options[] = {
|
||||
{ "stop_duration", NULL, OFFSET(stop_duration), AV_OPT_TYPE_DURATION, {.i64=0}, 0, 9000, FLAGS },
|
||||
{ "stop_threshold", NULL, OFFSET(stop_threshold), AV_OPT_TYPE_DOUBLE, {.dbl=0}, 0, DBL_MAX, FLAGS },
|
||||
{ "leave_silence", NULL, OFFSET(leave_silence), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
|
||||
{ "detection", NULL, OFFSET(detection), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, FLAGS, "detection" },
|
||||
{ "peak", 0, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "detection" },
|
||||
{ "rms", 0, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "detection" },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
AVFILTER_DEFINE_CLASS(silenceremove);
|
||||
|
||||
static double compute_peak(SilenceRemoveContext *s, double sample)
|
||||
{
|
||||
double new_sum;
|
||||
|
||||
new_sum = s->sum;
|
||||
new_sum -= *s->window_current;
|
||||
new_sum += fabs(sample);
|
||||
|
||||
return new_sum / s->window_size;
|
||||
}
|
||||
|
||||
static void update_peak(SilenceRemoveContext *s, double sample)
|
||||
{
|
||||
s->sum -= *s->window_current;
|
||||
*s->window_current = fabs(sample);
|
||||
s->sum += *s->window_current;
|
||||
|
||||
s->window_current++;
|
||||
if (s->window_current >= s->window_end)
|
||||
s->window_current = s->window;
|
||||
}
|
||||
|
||||
static double compute_rms(SilenceRemoveContext *s, double sample)
|
||||
{
|
||||
double new_sum;
|
||||
|
||||
new_sum = s->sum;
|
||||
new_sum -= *s->window_current;
|
||||
new_sum += sample * sample;
|
||||
|
||||
return sqrt(new_sum / s->window_size);
|
||||
}
|
||||
|
||||
static void update_rms(SilenceRemoveContext *s, double sample)
|
||||
{
|
||||
s->sum -= *s->window_current;
|
||||
*s->window_current = sample * sample;
|
||||
s->sum += *s->window_current;
|
||||
|
||||
s->window_current++;
|
||||
if (s->window_current >= s->window_end)
|
||||
s->window_current = s->window;
|
||||
}
|
||||
|
||||
static av_cold int init(AVFilterContext *ctx)
|
||||
{
|
||||
SilenceRemoveContext *s = ctx->priv;
|
||||
@ -96,16 +147,27 @@ static av_cold int init(AVFilterContext *ctx)
|
||||
s->restart = 1;
|
||||
}
|
||||
|
||||
switch (s->detection) {
|
||||
case 0:
|
||||
s->update = update_peak;
|
||||
s->compute = compute_peak;
|
||||
break;
|
||||
case 1:
|
||||
s->update = update_rms;
|
||||
s->compute = compute_rms;
|
||||
break;
|
||||
};
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void clear_rms(SilenceRemoveContext *s)
|
||||
static void clear_window(SilenceRemoveContext *s)
|
||||
{
|
||||
memset(s->window, 0, s->window_size * sizeof(*s->window));
|
||||
|
||||
s->window_current = s->window;
|
||||
s->window_end = s->window + s->window_size;
|
||||
s->rms_sum = 0;
|
||||
s->sum = 0;
|
||||
}
|
||||
|
||||
static int config_input(AVFilterLink *inlink)
|
||||
@ -118,7 +180,7 @@ static int config_input(AVFilterLink *inlink)
|
||||
if (!s->window)
|
||||
return AVERROR(ENOMEM);
|
||||
|
||||
clear_rms(s);
|
||||
clear_window(s);
|
||||
|
||||
s->start_duration = av_rescale(s->start_duration, inlink->sample_rate,
|
||||
AV_TIME_BASE);
|
||||
@ -153,28 +215,6 @@ static int config_input(AVFilterLink *inlink)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static double compute_rms(SilenceRemoveContext *s, double sample)
|
||||
{
|
||||
double new_sum;
|
||||
|
||||
new_sum = s->rms_sum;
|
||||
new_sum -= *s->window_current;
|
||||
new_sum += sample * sample;
|
||||
|
||||
return sqrt(new_sum / s->window_size);
|
||||
}
|
||||
|
||||
static void update_rms(SilenceRemoveContext *s, double sample)
|
||||
{
|
||||
s->rms_sum -= *s->window_current;
|
||||
*s->window_current = sample * sample;
|
||||
s->rms_sum += *s->window_current;
|
||||
|
||||
s->window_current++;
|
||||
if (s->window_current >= s->window_end)
|
||||
s->window_current = s->window;
|
||||
}
|
||||
|
||||
static void flush(AVFrame *out, AVFilterLink *outlink,
|
||||
int *nb_samples_written, int *ret)
|
||||
{
|
||||
@ -209,12 +249,12 @@ silence_trim:
|
||||
for (i = 0; i < nbs; i++) {
|
||||
threshold = 0;
|
||||
for (j = 0; j < inlink->channels; j++) {
|
||||
threshold |= compute_rms(s, ibuf[j]) > s->start_threshold;
|
||||
threshold |= s->compute(s, ibuf[j]) > s->start_threshold;
|
||||
}
|
||||
|
||||
if (threshold) {
|
||||
for (j = 0; j < inlink->channels; j++) {
|
||||
update_rms(s, *ibuf);
|
||||
s->update(s, *ibuf);
|
||||
s->start_holdoff[s->start_holdoff_end++] = *ibuf++;
|
||||
nb_samples_read++;
|
||||
}
|
||||
@ -232,7 +272,7 @@ silence_trim:
|
||||
s->start_holdoff_end = 0;
|
||||
|
||||
for (j = 0; j < inlink->channels; j++)
|
||||
update_rms(s, ibuf[j]);
|
||||
s->update(s, ibuf[j]);
|
||||
|
||||
ibuf += inlink->channels;
|
||||
nb_samples_read += inlink->channels;
|
||||
@ -284,7 +324,7 @@ silence_copy:
|
||||
for (i = 0; i < nbs; i++) {
|
||||
threshold = 1;
|
||||
for (j = 0; j < inlink->channels; j++)
|
||||
threshold &= compute_rms(s, ibuf[j]) > s->stop_threshold;
|
||||
threshold &= s->compute(s, ibuf[j]) > s->stop_threshold;
|
||||
|
||||
if (threshold && s->stop_holdoff_end && !s->leave_silence) {
|
||||
s->mode = SILENCE_COPY_FLUSH;
|
||||
@ -292,14 +332,14 @@ silence_copy:
|
||||
goto silence_copy_flush;
|
||||
} else if (threshold) {
|
||||
for (j = 0; j < inlink->channels; j++) {
|
||||
update_rms(s, *ibuf);
|
||||
s->update(s, *ibuf);
|
||||
*obuf++ = *ibuf++;
|
||||
nb_samples_read++;
|
||||
nb_samples_written++;
|
||||
}
|
||||
} else if (!threshold) {
|
||||
for (j = 0; j < inlink->channels; j++) {
|
||||
update_rms(s, *ibuf);
|
||||
s->update(s, *ibuf);
|
||||
if (s->leave_silence) {
|
||||
*obuf++ = *ibuf;
|
||||
nb_samples_written++;
|
||||
@ -323,7 +363,7 @@ silence_copy:
|
||||
s->start_found_periods = 0;
|
||||
s->start_holdoff_offset = 0;
|
||||
s->start_holdoff_end = 0;
|
||||
clear_rms(s);
|
||||
clear_window(s);
|
||||
s->mode = SILENCE_TRIM;
|
||||
flush(out, outlink, &nb_samples_written, &ret);
|
||||
goto silence_trim;
|
||||
|
Loading…
x
Reference in New Issue
Block a user