lavfi: add aselect audio variant of select

2012-12-11 01:19:30 +01:00 · 2012-12-11 01:19:30 +01:00 · f5461face5
commit f5461face5
parent 64c5fbd7de
6 changed files with 108 additions and 26 deletions
--- a/1
+++ b/1
@ -41,6 +41,7 @@ version <next>:
 - VobSub demuxer
 - JSON captions for TED talks decoding support
 - SOX Resampler support in libswresample
+- aselect filter


 version 1.0:
--- a/doc/filters.texi
+++ b/doc/filters.texi
@ -3506,7 +3506,7 @@ scale='min(500\, iw*3/2):-1'
@end example
@end itemize

-@section select
+@section aselect, select
 Select frames to pass in output.

 It accepts in input an expression, which is evaluated for each input
@ -3551,7 +3551,7 @@ the PTS of the first video frame in the video, NAN if undefined
@item start_t
 the time of the first video frame in the video, NAN if undefined

-@item pict_type
+@item pict_type @emph{(video only)}
 the type of the filtered frame, can assume one of the following
 values:
@table @option
@ -3564,7 +3564,7 @@ values:
@item BI
@end table

-@item interlace_type
+@item interlace_type @emph{(video only)}
 the frame interlace type, can assume one of the following values:
@table @option
@item PROGRESSIVE
@ -3575,6 +3575,15 @@ the frame is top-field-first
 the frame is bottom-field-first
@end table

+@item consumed_sample_n @emph{(audio only)}
+the number of selected samples before the current frame
+
+@item samples_n @emph{(audio only)}
+the number of samples in the current frame
+
+@item sample_rate @emph{(audio only)}
+the input sample rate
+
@item key
 1 if the filtered frame is a key-frame, 0 otherwise

@ -3582,7 +3591,7 @@ the frame is bottom-field-first
 the position in the file of the filtered frame, -1 if the information
 is not available (e.g. for synthetic video)

-@item scene
+@item scene @emph{(video only)}
 value between 0 and 1 to indicate a new scene; a low value reflects a low
 probability for the current frame to introduce a new scene, while a higher
 value means the current frame is more likely to be one (see the example below)
@ -3617,6 +3626,9 @@ select='gte(t\,10)*lte(t\,20)*eq(pict_type\,I)'

 # select frames with a minimum distance of 10 seconds
 select='isnan(prev_selected_t)+gte(t-prev_selected_t\,10)'
+
+# use aselect to select only audio frames with samples number > 100
+aselect='gt(samples_n\,100)'
@end example

 Complete example to create a mosaic of the first scenes:
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@ -54,6 +54,7 @@ OBJS-$(CONFIG_AMERGE_FILTER)                 += af_amerge.o
 OBJS-$(CONFIG_AMIX_FILTER)                   += af_amix.o
 OBJS-$(CONFIG_ANULL_FILTER)                  += af_anull.o
 OBJS-$(CONFIG_ARESAMPLE_FILTER)              += af_aresample.o
+OBJS-$(CONFIG_ASELECT_FILTER)                += vf_select.o
 OBJS-$(CONFIG_ASENDCMD_FILTER)               += f_sendcmd.o
 OBJS-$(CONFIG_ASETNSAMPLES_FILTER)           += af_asetnsamples.o
 OBJS-$(CONFIG_ASETPTS_FILTER)                += f_setpts.o
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@ -46,6 +46,7 @@ void avfilter_register_all(void)
    REGISTER_FILTER (AMIX,        amix,        af);
    REGISTER_FILTER (ANULL,       anull,       af);
    REGISTER_FILTER (ARESAMPLE,   aresample,   af);
+    REGISTER_FILTER (ASELECT,     aselect,     af);
    REGISTER_FILTER (ASENDCMD,    asendcmd,    af);
    REGISTER_FILTER (ASETNSAMPLES, asetnsamples, af);
    REGISTER_FILTER (ASETPTS,     asetpts,     af);
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@ -29,8 +29,8 @@
 #include "libavutil/avutil.h"

 #define LIBAVFILTER_VERSION_MAJOR  3
-#define LIBAVFILTER_VERSION_MINOR  26
-#define LIBAVFILTER_VERSION_MICRO 102
+#define LIBAVFILTER_VERSION_MINOR  27
+#define LIBAVFILTER_VERSION_MICRO 100

 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                               LIBAVFILTER_VERSION_MINOR, \
--- a/libavfilter/vf_select.c
+++ b/libavfilter/vf_select.c
@ -27,6 +27,7 @@
 #include "libavutil/fifo.h"
 #include "libavutil/internal.h"
 #include "avfilter.h"
+#include "audio.h"
 #include "formats.h"
 #include "internal.h"
 #include "video.h"
@ -62,6 +63,10 @@ static const char *const var_names[] = {
    "TOPFIRST",
    "BOTTOMFIRST",

+    "consumed_samples_n",///< number of samples consumed by the filter (only audio)
+    "samples_n",         ///< number of samples in the current frame (only audio)
+    "sample_rate",       ///< sample rate (only audio)
+
    "n",                 ///< frame number (starting from zero)
    "selected_n",        ///< selected frame number (starting from zero)
    "prev_selected_n",   ///< number of the last selected frame
@ -101,6 +106,10 @@ enum var_name {
    VAR_INTERLACE_TYPE_T,
    VAR_INTERLACE_TYPE_B,

+    VAR_CONSUMED_SAMPLES_N,
+    VAR_SAMPLES_N,
+    VAR_SAMPLE_RATE,
+
    VAR_N,
    VAR_SELECTED_N,
    VAR_PREV_SELECTED_N,
@ -174,6 +183,9 @@ static int config_input(AVFilterLink *inlink)
    select->var_values[VAR_INTERLACE_TYPE_T] = INTERLACE_TYPE_T;
    select->var_values[VAR_INTERLACE_TYPE_B] = INTERLACE_TYPE_B;

+    select->var_values[VAR_SAMPLE_RATE] =
+        inlink->type == AVMEDIA_TYPE_AUDIO ? inlink->sample_rate : NAN;
+
    if (CONFIG_AVCODEC && select->do_scene_detect) {
        select->avctx = avcodec_alloc_context3(NULL);
        if (!select->avctx)
@ -231,13 +243,6 @@ static int select_frame(AVFilterContext *ctx, AVFilterBufferRef *picref)
    AVFilterLink *inlink = ctx->inputs[0];
    double res;

-    if (CONFIG_AVCODEC && select->do_scene_detect) {
-        char buf[32];
-        select->var_values[VAR_SCENE] = get_scene_score(ctx, picref);
-        // TODO: document metadata
-        snprintf(buf, sizeof(buf), "%f", select->var_values[VAR_SCENE]);
-        av_dict_set(&picref->metadata, "lavfi.scene_score", buf, 0);
-    }
    if (isnan(select->var_values[VAR_START_PTS]))
        select->var_values[VAR_START_PTS] = TS2D(picref->pts);
    if (isnan(select->var_values[VAR_START_T]))
@ -248,34 +253,63 @@ static int select_frame(AVFilterContext *ctx, AVFilterBufferRef *picref)
    select->var_values[VAR_POS] = picref->pos == -1 ? NAN : picref->pos;
    select->var_values[VAR_PREV_PTS] = TS2D(picref ->pts);

-    select->var_values[VAR_INTERLACE_TYPE] =
-        !picref->video->interlaced     ? INTERLACE_TYPE_P :
+    switch (inlink->type) {
+    case AVMEDIA_TYPE_AUDIO:
+        select->var_values[VAR_SAMPLES_N] = picref->audio->nb_samples;
+        break;
+
+    case AVMEDIA_TYPE_VIDEO:
+        select->var_values[VAR_INTERLACE_TYPE] =
+            !picref->video->interlaced     ? INTERLACE_TYPE_P :
        picref->video->top_field_first ? INTERLACE_TYPE_T : INTERLACE_TYPE_B;
-    select->var_values[VAR_PICT_TYPE] = picref->video->pict_type;
+        select->var_values[VAR_PICT_TYPE] = picref->video->pict_type;
+        if (CONFIG_AVCODEC && select->do_scene_detect) {
+            char buf[32];
+            select->var_values[VAR_SCENE] = get_scene_score(ctx, picref);
+            // TODO: document metadata
+            snprintf(buf, sizeof(buf), "%f", select->var_values[VAR_SCENE]);
+            av_dict_set(&picref->metadata, "lavfi.scene_score", buf, 0);
+        }
+        break;
+    }

    res = av_expr_eval(select->expr, select->var_values, NULL);
    av_log(inlink->dst, AV_LOG_DEBUG,
-           "n:%d pts:%d t:%f pos:%d interlace_type:%c key:%d pict_type:%c "
-           "-> select:%f\n",
+           "n:%d pts:%d t:%f pos:%d key:%d",
           (int)select->var_values[VAR_N],
           (int)select->var_values[VAR_PTS],
           select->var_values[VAR_T],
           (int)select->var_values[VAR_POS],
-           select->var_values[VAR_INTERLACE_TYPE] == INTERLACE_TYPE_P ? 'P' :
-           select->var_values[VAR_INTERLACE_TYPE] == INTERLACE_TYPE_T ? 'T' :
-           select->var_values[VAR_INTERLACE_TYPE] == INTERLACE_TYPE_B ? 'B' : '?',
-           (int)select->var_values[VAR_KEY],
-           av_get_picture_type_char(select->var_values[VAR_PICT_TYPE]),
-           res);
+           (int)select->var_values[VAR_KEY]);

-    select->var_values[VAR_N] += 1.0;
+    switch (inlink->type) {
+    case AVMEDIA_TYPE_VIDEO:
+        av_log(inlink->dst, AV_LOG_DEBUG, " interlace_type:%c pict_type:%c",
+               select->var_values[VAR_INTERLACE_TYPE] == INTERLACE_TYPE_P ? 'P' :
+               select->var_values[VAR_INTERLACE_TYPE] == INTERLACE_TYPE_T ? 'T' :
+               select->var_values[VAR_INTERLACE_TYPE] == INTERLACE_TYPE_B ? 'B' : '?',
+               av_get_picture_type_char(select->var_values[VAR_PICT_TYPE]));
+        break;
+    case AVMEDIA_TYPE_AUDIO:
+        av_log(inlink->dst, AV_LOG_DEBUG, " samples_n:%d consumed_samples_n:%d",
+               (int)select->var_values[VAR_SAMPLES_N],
+               (int)select->var_values[VAR_CONSUMED_SAMPLES_N]);
+        break;
+    }
+
+    av_log(inlink->dst, AV_LOG_DEBUG, " -> select:%f\n", res);

    if (res) {
        select->var_values[VAR_PREV_SELECTED_N]   = select->var_values[VAR_N];
        select->var_values[VAR_PREV_SELECTED_PTS] = select->var_values[VAR_PTS];
        select->var_values[VAR_PREV_SELECTED_T]   = select->var_values[VAR_T];
        select->var_values[VAR_SELECTED_N] += 1.0;
+        if (inlink->type == AVMEDIA_TYPE_AUDIO)
+            select->var_values[VAR_CONSUMED_SAMPLES_N] += picref->audio->nb_samples;
    }
+
+    select->var_values[VAR_N] += 1.0;
+
    return res;
 }

@ -339,6 +373,38 @@ static int query_formats(AVFilterContext *ctx)
    return 0;
 }

+#if CONFIG_ASELECT_FILTER
+static const AVFilterPad avfilter_af_aselect_inputs[] = {
+    {
+        .name             = "default",
+        .type             = AVMEDIA_TYPE_AUDIO,
+        .get_audio_buffer = ff_null_get_audio_buffer,
+        .config_props     = config_input,
+        .filter_frame     = filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad avfilter_af_aselect_outputs[] = {
+    {
+        .name = "default",
+        .type = AVMEDIA_TYPE_AUDIO,
+    },
+    { NULL }
+};
+
+AVFilter avfilter_af_aselect = {
+    .name      = "aselect",
+    .description = NULL_IF_CONFIG_SMALL("Select audio frames to pass in output."),
+    .init      = init,
+    .uninit    = uninit,
+    .priv_size = sizeof(SelectContext),
+    .inputs    = avfilter_af_aselect_inputs,
+    .outputs   = avfilter_af_aselect_outputs,
+};
+#endif /* CONFIG_ASELECT_FILTER */
+
+#if CONFIG_SELECT_FILTER
 static const AVFilterPad avfilter_vf_select_inputs[] = {
    {
        .name             = "default",
@ -362,7 +428,7 @@ static const AVFilterPad avfilter_vf_select_outputs[] = {

 AVFilter avfilter_vf_select = {
    .name      = "select",
-    .description = NULL_IF_CONFIG_SMALL("Select frames to pass in output."),
+    .description = NULL_IF_CONFIG_SMALL("Select video frames to pass in output."),
    .init      = init,
    .uninit    = uninit,
    .query_formats = query_formats,
@ -372,3 +438,4 @@ AVFilter avfilter_vf_select = {
    .inputs    = avfilter_vf_select_inputs,
    .outputs   = avfilter_vf_select_outputs,
 };
+#endif /* CONFIG_SELECT_FILTER */