vpx/vp9/common/vp9_reconinter.h

/*
 *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#ifndef VP9_COMMON_VP9_RECONINTER_H_
#define VP9_COMMON_VP9_RECONINTER_H_

#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_onyxc_int.h"

struct subpix_fn_table;
void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,
                                    BLOCK_SIZE bsize);

void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,
                                     BLOCK_SIZE bsize);

void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
                                   BLOCK_SIZE bsize);

void vp9_setup_interp_filters(MACROBLOCKD *xd,
                              INTERPOLATIONFILTERTYPE filter,
                              VP9_COMMON *cm);

void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
                               uint8_t *dst, int dst_stride,
                               const MV *mv_q3,
                               const struct scale_factors *scale,
                               int w, int h, int do_avg,
                               const struct subpix_fn_table *subpix,
                               enum mv_precision precision);

static int scaled_buffer_offset(int x_offset, int y_offset, int stride,
                                const struct scale_factors *scale) {
  const int x = scale ? scale->scale_value_x(x_offset, scale) : x_offset;
  const int y = scale ? scale->scale_value_y(y_offset, scale) : y_offset;
  return y * stride + x;
}

static void setup_pred_plane(struct buf_2d *dst,
                             uint8_t *src, int stride,
                             int mi_row, int mi_col,
                             const struct scale_factors *scale,
                             int subsampling_x, int subsampling_y) {
  const int x = (MI_SIZE * mi_col) >> subsampling_x;
  const int y = (MI_SIZE * mi_row) >> subsampling_y;
  dst->buf = src + scaled_buffer_offset(x, y, stride, scale);
  dst->stride = stride;
}

// TODO(jkoleszar): audit all uses of this that don't set mb_row, mb_col
static void setup_dst_planes(MACROBLOCKD *xd,
                             const YV12_BUFFER_CONFIG *src,
                             int mi_row, int mi_col) {
  uint8_t *buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
                         src->alpha_buffer};
  int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
                    src->alpha_stride};
  int i;

  for (i = 0; i < MAX_MB_PLANE; ++i) {
    struct macroblockd_plane *pd = &xd->plane[i];
    setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL,
                     pd->subsampling_x, pd->subsampling_y);
  }
}

static void setup_pre_planes(MACROBLOCKD *xd, int i,
                             const YV12_BUFFER_CONFIG *src,
                             int mi_row, int mi_col,
                             const struct scale_factors *sf) {
  if (src) {
    int j;
    uint8_t* buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer,
                           src->alpha_buffer};
    int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,
                      src->alpha_stride};

    for (j = 0; j < MAX_MB_PLANE; ++j) {
      struct macroblockd_plane *pd = &xd->plane[j];
      setup_pred_plane(&pd->pre[i], buffers[j], strides[j],
                     mi_row, mi_col, sf, pd->subsampling_x, pd->subsampling_y);
    }
  }
}

static void set_scale_factors(MACROBLOCKD *xd, int ref0, int ref1,
                              struct scale_factors sf[MAX_REF_FRAMES]) {
  xd->scale_factor[0] = sf[ref0 >= 0 ? ref0 : 0];
  xd->scale_factor[1] = sf[ref1 >= 0 ? ref1 : 0];
}

void vp9_setup_scale_factors(VP9_COMMON *cm, int i);

#endif  // VP9_COMMON_VP9_RECONINTER_H_
Initial WebM release 2010-05-18 17:58:33 +02:00			`/*`
Use WebM in copyright notice for consistency Changes 'The VP8 project' to 'The WebM project', for consistency with other webmproject.org repositories. Fixes issue #97. Change-Id: I37c13ed5fbdb9d334ceef71c6350e9febed9bbba 2010-09-09 14:16:39 +02:00			`* Copyright (c) 2010 The WebM project authors. All Rights Reserved.`
Initial WebM release 2010-05-18 17:58:33 +02:00			`*`
cosmetics: trim trailing whitespace When the license headers were updated, they accidentally contained trailing whitespace, so unfortunately we have to touch all the files again. Change-Id: I236c05fade06589e417179c0444cb39b09e4200d 2010-06-18 18:39:21 +02:00			`* Use of this source code is governed by a BSD-style license`
LICENSE: update with latest text Change-Id: Ieebea089095d9073b3a94932791099f614ce120c 2010-06-04 22:19:40 +02:00			`* that can be found in the LICENSE file in the root of the source`
			`* tree. An additional intellectual property rights grant can be found`
cosmetics: trim trailing whitespace When the license headers were updated, they accidentally contained trailing whitespace, so unfortunately we have to touch all the files again. Change-Id: I236c05fade06589e417179c0444cb39b09e4200d 2010-06-18 18:39:21 +02:00			`* in the file PATENTS. All contributing project authors may`
LICENSE: update with latest text Change-Id: Ieebea089095d9073b3a94932791099f614ce120c 2010-06-04 22:19:40 +02:00			`* be found in the AUTHORS file in the root of the source tree.`
Initial WebM release 2010-05-18 17:58:33 +02:00			`*/`

google style guide include guards Change-Id: I2c252f3ddcc99e96c1f5d3dab8bcb25a2a3637ea 2012-11-30 01:36:10 +01:00			`#ifndef VP9_COMMON_VP9_RECONINTER_H_`
			`#define VP9_COMMON_VP9_RECONINTER_H_`
Initial WebM release 2010-05-18 17:58:33 +02:00
Use standard integer types for pixel values and coefficients. For coefficients, use int16_t (instead of short); for pixel values in 16-bit intermediates, use uint16_t (instead of unsigned short); for all others, use uint8_t (instead of unsigned char). Change-Id: I3619cd9abf106c3742eccc2e2f5e89a62774f7da 2012-12-19 00:31:19 +01:00			`#include "vpx/vpx_integer.h"`
fixed includes to be fully specified Change-Id: Ia1cce221f8511561b9cbd8edb7726fbc286ff243 2012-11-28 19:41:40 +01:00			`#include "vp9/common/vp9_onyxc_int.h"`
Adds support for switchable interpolation filters. Allows for swtiching/setting interpolation filters at the MB level. A frame level flag indicates whether to use a specifc filter for the entire frame or to signal the interpolation filter for each MB. When switchable filters are used, the encoder chooses between 8-tap and 8-tap sharp filters. The code currently has options to explore other variations as well, which will be cleaned up subsequently. One issue with the framework is that encoding is slow. I tried to do some tricks to speed things up but it is still slow. Decoding speed should not be affected since the number of filter taps remain unchanged. With the current version, we are up 0.5% on derf on average but some videos city/mobile improve by close to 4 and 2% respectively. If we did a full-search by turning the SEARCH_BEST_FILTER flag on, the results are somewhat better. The framework can be combined with filtered prediction, and I seek feedback regarding that. Rebased. Change-Id: I8f632cb2c111e76284140a2bd480945d6d42b77a 2012-07-18 22:43:01 +02:00
Convert subpixel filters to use convolve framework Update the code to call the new convolution functions to do subpixel prediction rather than the existing functions. Remove the old C and assembly code, since it is unused. This causes a 50% performance reduction on the decoder, but that will be resolved when the asm for the new functions is available. There is no consensus for whether 6-tap or 2-tap predictors will be supported in the final codec, so these filters are implemented in terms of the 8-tap code, so that quality testing of these modes can continue. Implementing the lower complexity algorithms is a simple exercise, should it be necessary. This code produces slightly better results in the EIGHTTAP_SMOOTH case, since the filter is now applied in only one direction when the subpel motion is only in one direction. Like the previous code, the filtering is skipped entirely on full-pel MVs. This combination seems to give the best quality gains, but this may be indicative of a bug in the encoder's filter selection, since the encoder could achieve the result of skipping the filtering on full-pel by selecting one of the other filters. This should be revisited. Quality gains on derf positive on almost all clips. The only clip that seemed to be hurt at all datarates was football (-0.115% PSNR average, -0.587% min). Overall averages 0.375% PSNR, 0.347% SSIM. Change-Id: I7d469716091b1d89b4b08adde5863999319d69ff 2013-01-29 01:59:03 +01:00			`struct subpix_fn_table;`
Renaming BLOCK_SIZE_TYPE to BLOCK_SIZE in the common/decoder. Adding temporary "typedef BLOCK_SIZE BLOCK_SIZE_TYPE" which will go away after encoder's patch. Change-Id: I06ec6a6f079401439843ec981d1496234fd7775c 2013-08-26 20:33:16 +02:00			`void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col,`
			`BLOCK_SIZE bsize);`
Make the use of pred buffers consistent in MB/SB Use in-place buffers (dst of MACROBLOCKD) for macroblock prediction. This makes the macroblock buffer handling consistent with those of superblock. Remove predictor buffer MACROBLOCKD. Change-Id: Id1bcd898961097b1e6230c10f0130753a59fc6df 2013-04-15 18:31:27 +02:00
Renaming BLOCK_SIZE_TYPE to BLOCK_SIZE in the common/decoder. Adding temporary "typedef BLOCK_SIZE BLOCK_SIZE_TYPE" which will go away after encoder's patch. Change-Id: I06ec6a6f079401439843ec981d1496234fd7775c 2013-08-26 20:33:16 +02:00			`void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col,`
			`BLOCK_SIZE bsize);`
reconinter: remove unnecessary functions, params Removes the redundant dst pointers from vp9_build_inter_predictors_sb{y,uv} and the remaining mb specific functions. Change-Id: I7b6bf439d9394b85ea79b4fe61a3ffc1025720da 2013-04-19 19:45:50 +02:00
Renaming BLOCK_SIZE_TYPE to BLOCK_SIZE in the common/decoder. Adding temporary "typedef BLOCK_SIZE BLOCK_SIZE_TYPE" which will go away after encoder's patch. Change-Id: I06ec6a6f079401439843ec981d1496234fd7775c 2013-08-26 20:33:16 +02:00			`void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,`
			`BLOCK_SIZE bsize);`
Fix some compiler warnings. Change-Id: Iccb56e3b966d1519ba49dfe4e1396479c6b54a6f 2012-08-21 02:45:36 +02:00
Removing redundant 'extern' keyword from function declarations. Change-Id: I893fa36297b9bd9cff93d082f1736f6860b15c0d 2013-02-27 00:52:05 +01:00			`void vp9_setup_interp_filters(MACROBLOCKD *xd,`
			`INTERPOLATIONFILTERTYPE filter,`
			`VP9_COMMON *cm);`
Initial WebM release 2010-05-18 17:58:33 +02:00
Refactor inter recon functions to support scaling Ensure that all inter prediction goes through a common code path that takes scaling into account. Removes a bunch of duplicate 1st/2nd predictor code. Also introduces a 16x8 mode for 8x8 MVs, similar to the 8x4 trick we were doing before. This has an unexpected effect with EIGHTTAP_SMOOTH, so it's disabled in that case for now. Change-Id: Ia053e823a8bc616a988a0af30452e1e75a739cba 2013-02-09 02:49:44 +01:00			`void vp9_build_inter_predictor(const uint8_t *src, int src_stride,`
			`uint8_t *dst, int dst_stride,`
Changing function arg type from int_mv* to MV*. Change-Id: Ic878d31df2ce783a2c9a8c4bc9ed301ec8ffe25e 2013-08-03 00:26:32 +02:00			`const MV *mv_q3,`
Refactor inter recon functions to support scaling Ensure that all inter prediction goes through a common code path that takes scaling into account. Removes a bunch of duplicate 1st/2nd predictor code. Also introduces a 16x8 mode for 8x8 MVs, similar to the 8x4 trick we were doing before. This has an unexpected effect with EIGHTTAP_SMOOTH, so it's disabled in that case for now. Change-Id: Ia053e823a8bc616a988a0af30452e1e75a739cba 2013-02-09 02:49:44 +01:00			`const struct scale_factors *scale,`
			`int w, int h, int do_avg,`
Transforming scale_mv_component_q4 into scale_mv_q4 function. Using MV instead of int_mv for function arguments. Change-Id: Ic25e13dccbc98fac1fa1b3255127e00cca2a57f6 2013-06-22 00:34:29 +02:00			`const struct subpix_fn_table *subpix,`
			`enum mv_precision precision);`
Removing redundant 'extern' keyword from function declarations. Change-Id: I893fa36297b9bd9cff93d082f1736f6860b15c0d 2013-02-27 00:52:05 +01:00
Eliminating several YV12_BUFFER_CONFIG usages. Change-Id: Ia85b987c935d545920dcae5a6f44136b1a08a008 2013-05-08 23:11:47 +02:00			`static int scaled_buffer_offset(int x_offset, int y_offset, int stride,`
Spatial resamping of ZEROMV predictors This patch allows coding frames using references of different resolution, in ZEROMV mode. For compound prediction, either reference may be scaled. To test, I use the resize_test and enable WRITE_RECON_BUFFER in vp9_onyxd_if.c. It's also useful to apply this patch to test/i420_video_source.h: --- a/test/i420_video_source.h +++ b/test/i420_video_source.h @@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource { virtual void FillFrame() { // Read a frame from input_file. + if (frame_ != 3) if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) { limit_ = frame_; } This forces the frame that the resolution changes on to be coded with no motion, only scaling, and improves the quality of the result. Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496 2013-02-25 05:55:14 +01:00			`const struct scale_factors *scale) {`
Eliminating several YV12_BUFFER_CONFIG usages. Change-Id: Ia85b987c935d545920dcae5a6f44136b1a08a008 2013-05-08 23:11:47 +02:00			`const int x = scale ? scale->scale_value_x(x_offset, scale) : x_offset;`
			`const int y = scale ? scale->scale_value_y(y_offset, scale) : y_offset;`
			`return y * stride + x;`
Move dst to per-plane MACROBLOCKD data First in a series of commits moving the framebuffers pointers to per-plane data, so that they can be indexed numerically rather than by name. Change-Id: I6e0d60fd4d51e6375c384eb7321776564df21775 2013-04-20 00:52:17 +02:00			`}`

			`static void setup_pred_plane(struct buf_2d *dst,`
			`uint8_t *src, int stride,`
Grow MODE_INFO array to use an 8x8 basis. Change-Id: I087e08e7909a406b71715b8525c104208daa6889 2013-04-26 20:57:17 +02:00			`int mi_row, int mi_col,`
Move dst to per-plane MACROBLOCKD data First in a series of commits moving the framebuffers pointers to per-plane data, so that they can be indexed numerically rather than by name. Change-Id: I6e0d60fd4d51e6375c384eb7321776564df21775 2013-04-20 00:52:17 +02:00			`const struct scale_factors *scale,`
			`int subsampling_x, int subsampling_y) {`
Grow MODE_INFO array to use an 8x8 basis. Change-Id: I087e08e7909a406b71715b8525c104208daa6889 2013-04-26 20:57:17 +02:00			`const int x = (MI_SIZE * mi_col) >> subsampling_x;`
			`const int y = (MI_SIZE * mi_row) >> subsampling_y;`
Move dst to per-plane MACROBLOCKD data First in a series of commits moving the framebuffers pointers to per-plane data, so that they can be indexed numerically rather than by name. Change-Id: I6e0d60fd4d51e6375c384eb7321776564df21775 2013-04-20 00:52:17 +02:00			`dst->buf = src + scaled_buffer_offset(x, y, stride, scale);`
			`dst->stride = stride;`
			`}`

			`// TODO(jkoleszar): audit all uses of this that don't set mb_row, mb_col`
			`static void setup_dst_planes(MACROBLOCKD *xd,`
			`const YV12_BUFFER_CONFIG *src,`
Grow MODE_INFO array to use an 8x8 basis. Change-Id: I087e08e7909a406b71715b8525c104208daa6889 2013-04-26 20:57:17 +02:00			`int mi_row, int mi_col) {`
Initial version of alpha channel support This is a mostly-working implementation of an extra channel in the bitstream. Configure with --enable-alpha to test. Notable TODOs: - Add extra channel to all mismatch tests, PSNR, SSIM, etc - Configurable subsampling - Variable number of planes (currently always uses all 4) - Loop filtering - Per-plane lossless quantizer - ARNR support This implementation just uses the same contents as the Y channel for the A channel, due to lack of content and general pain in playing back 4 channel content. A later patch will use the actual alpha channel passed in from outside the codec. Change-Id: Ibf81f023b1c570bd84b3064e9b4b8ae52e087592 2013-05-16 02:55:08 +02:00			`uint8_t *buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer,`
			`src->alpha_buffer};`
			`int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,`
			`src->alpha_stride};`
Using loop to iterate through YV12_BUFFER_CONFIG planes. Change-Id: I22f1066eb0022c8d75f65a78435ee4ffecdfe0c9 2013-05-08 22:39:16 +02:00			`int i;`

			`for (i = 0; i < MAX_MB_PLANE; ++i) {`
			`struct macroblockd_plane *pd = &xd->plane[i];`
			`setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL,`
			`pd->subsampling_x, pd->subsampling_y);`
			`}`
Spatial resamping of ZEROMV predictors This patch allows coding frames using references of different resolution, in ZEROMV mode. For compound prediction, either reference may be scaled. To test, I use the resize_test and enable WRITE_RECON_BUFFER in vp9_onyxd_if.c. It's also useful to apply this patch to test/i420_video_source.h: --- a/test/i420_video_source.h +++ b/test/i420_video_source.h @@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource { virtual void FillFrame() { // Read a frame from input_file. + if (frame_ != 3) if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) { limit_ = frame_; } This forces the frame that the resolution changes on to be coded with no motion, only scaling, and improves the quality of the result. Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496 2013-02-25 05:55:14 +01:00			`}`

Refactoring setup_pre_planes function. Removing set_refs, adding set_ref function. Change-Id: I5635c478b106ae4e57d317f1c83d929644307e63 2013-07-04 02:42:01 +02:00			`static void setup_pre_planes(MACROBLOCKD *xd, int i,`
			`const YV12_BUFFER_CONFIG *src,`
Grow MODE_INFO array to use an 8x8 basis. Change-Id: I087e08e7909a406b71715b8525c104208daa6889 2013-04-26 20:57:17 +02:00			`int mi_row, int mi_col,`
Merge scale_factors and scale_factors_uv. This prevents a duplicate memcpy of a 128-byte struct every time set_scale_factors() is called (which is a lot), thus leading to a decrease from 3.7 MB to 1.85 MB of struct copying per 64x64 block RD/partition loop. Overall, this decreases encoding time of the first 50 frames of bus @ 1500kbps (speed 0) from 1min5.9 to 1min4.9, i.e. about a 1.5% overall speedup. We can likely get more gains by removing the copy of the other struct (and replacing it with an indexing) as well. Change-Id: I3dceb7e79f71e6fe911b11cc994cf89a869dde7a 2013-07-18 00:27:12 +02:00			`const struct scale_factors *sf) {`
Refactoring setup_pre_planes function. Removing set_refs, adding set_ref function. Change-Id: I5635c478b106ae4e57d317f1c83d929644307e63 2013-07-04 02:42:01 +02:00			`if (src) {`
			`int j;`
			`uint8_t* buffers[4] = {src->y_buffer, src->u_buffer, src->v_buffer,`
			`src->alpha_buffer};`
			`int strides[4] = {src->y_stride, src->uv_stride, src->uv_stride,`
			`src->alpha_stride};`

			`for (j = 0; j < MAX_MB_PLANE; ++j) {`
			`struct macroblockd_plane *pd = &xd->plane[j];`
			`setup_pred_plane(&pd->pre[i], buffers[j], strides[j],`
			`mi_row, mi_col, sf, pd->subsampling_x, pd->subsampling_y);`
Using loop to iterate through YV12_BUFFER_CONFIG planes. Change-Id: I22f1066eb0022c8d75f65a78435ee4ffecdfe0c9 2013-05-08 22:39:16 +02:00			`}`
Move pre, second_pre to per-plane MACROBLOCKD data Continue moving framebuffers to per-plane data. Change-Id: I237e5a998b364c4ec20316e7249206c0bff8631a 2013-04-20 04:16:14 +02:00			`}`
			`}`

Decoder's code cleanup. Using vp9_set_pred_flag function instead of custom code, adding decode_tokens function which is now called from decode_atom, decode_sb_intra, and decode_sb. Change-Id: Ie163a7106c0241099da9c5fe03069bd71f9d9ff8 2013-06-28 01:15:43 +02:00			`static void set_scale_factors(MACROBLOCKD *xd, int ref0, int ref1,`
			`struct scale_factors sf[MAX_REF_FRAMES]) {`
Merge scale_factors and scale_factors_uv. This prevents a duplicate memcpy of a 128-byte struct every time set_scale_factors() is called (which is a lot), thus leading to a decrease from 3.7 MB to 1.85 MB of struct copying per 64x64 block RD/partition loop. Overall, this decreases encoding time of the first 50 frames of bus @ 1500kbps (speed 0) from 1min5.9 to 1min4.9, i.e. about a 1.5% overall speedup. We can likely get more gains by removing the copy of the other struct (and replacing it with an indexing) as well. Change-Id: I3dceb7e79f71e6fe911b11cc994cf89a869dde7a 2013-07-18 00:27:12 +02:00			`xd->scale_factor[0] = sf[ref0 >= 0 ? ref0 : 0];`
			`xd->scale_factor[1] = sf[ref1 >= 0 ? ref1 : 0];`
Spatial resamping of ZEROMV predictors This patch allows coding frames using references of different resolution, in ZEROMV mode. For compound prediction, either reference may be scaled. To test, I use the resize_test and enable WRITE_RECON_BUFFER in vp9_onyxd_if.c. It's also useful to apply this patch to test/i420_video_source.h: --- a/test/i420_video_source.h +++ b/test/i420_video_source.h @@ -93,6 +93,7 @@ class I420VideoSource : public VideoSource { virtual void FillFrame() { // Read a frame from input_file. + if (frame_ != 3) if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) { limit_ = frame_; } This forces the frame that the resolution changes on to be coded with no motion, only scaling, and improves the quality of the result. Change-Id: I1ee75d19a437ff801192f767fd02a36bcbd1d496 2013-02-25 05:55:14 +01:00			`}`

Moving the same code to new function vp9_setup_scale_factors. Change-Id: I2408ad22717784a40e23701ccb9d978265440e4f 2013-05-15 02:10:17 +02:00			`void vp9_setup_scale_factors(VP9_COMMON *cm, int i);`

Use standard integer types for pixel values and coefficients. For coefficients, use int16_t (instead of short); for pixel values in 16-bit intermediates, use uint16_t (instead of unsigned short); for all others, use uint8_t (instead of unsigned char). Change-Id: I3619cd9abf106c3742eccc2e2f5e89a62774f7da 2012-12-19 00:31:19 +01:00			`#endif // VP9_COMMON_VP9_RECONINTER_H_`