2010-05-18 17:58:33 +02:00
|
|
|
/*
|
2010-09-09 14:16:39 +02:00
|
|
|
* Copyright (c) 2010 The WebM project authors. All Rights Reserved.
|
2010-05-18 17:58:33 +02:00
|
|
|
*
|
2010-06-18 18:39:21 +02:00
|
|
|
* Use of this source code is governed by a BSD-style license
|
2010-06-04 22:19:40 +02:00
|
|
|
* that can be found in the LICENSE file in the root of the source
|
|
|
|
* tree. An additional intellectual property rights grant can be found
|
2010-06-18 18:39:21 +02:00
|
|
|
* in the file PATENTS. All contributing project authors may
|
2010-06-04 22:19:40 +02:00
|
|
|
* be found in the AUTHORS file in the root of the source tree.
|
2010-05-18 17:58:33 +02:00
|
|
|
*/
|
|
|
|
|
|
|
|
|
2012-11-28 19:41:40 +01:00
|
|
|
#include "vp9/encoder/vp9_variance.h"
|
2012-11-27 22:59:17 +01:00
|
|
|
#include "vp9/common/vp9_filter.h"
|
|
|
|
#include "vp9/common/vp9_subpelvar.h"
|
2012-11-15 00:02:43 +01:00
|
|
|
#include "vpx/vpx_integer.h"
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) {
|
2012-10-30 22:25:33 +01:00
|
|
|
unsigned int i, sum = 0;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
2012-10-30 22:25:33 +01:00
|
|
|
for (i = 0; i < 256; i++) {
|
2012-07-14 00:21:29 +02:00
|
|
|
sum += (src_ptr[i] * src_ptr[i]);
|
2012-10-30 22:25:33 +01:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-07-14 00:21:29 +02:00
|
|
|
return sum;
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
unsigned int vp9_variance64x64_c(const uint8_t *src_ptr,
|
|
|
|
int source_stride,
|
|
|
|
const uint8_t *ref_ptr,
|
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
|
|
|
unsigned int var;
|
|
|
|
int avg;
|
|
|
|
|
|
|
|
variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg);
|
|
|
|
*sse = var;
|
|
|
|
return (var - (((int64_t)avg * avg) >> 12));
|
|
|
|
}
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_variance32x32_c(const uint8_t *src_ptr,
|
2012-08-20 23:43:34 +02:00
|
|
|
int source_stride,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *ref_ptr,
|
2012-08-20 23:43:34 +02:00
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
|
|
|
unsigned int var;
|
|
|
|
int avg;
|
|
|
|
|
|
|
|
variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg);
|
|
|
|
*sse = var;
|
2012-11-15 00:02:43 +01:00
|
|
|
return (var - (((int64_t)avg * avg) >> 10));
|
2012-08-20 23:43:34 +02:00
|
|
|
}
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_variance16x16_c(const uint8_t *src_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int source_stride,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *ref_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
2012-07-14 00:21:29 +02:00
|
|
|
unsigned int var;
|
|
|
|
int avg;
|
|
|
|
|
|
|
|
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
|
|
|
|
*sse = var;
|
2012-11-07 01:58:11 +01:00
|
|
|
return (var - (((unsigned int)avg * avg) >> 8));
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_variance8x16_c(const uint8_t *src_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int source_stride,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *ref_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
2012-07-14 00:21:29 +02:00
|
|
|
unsigned int var;
|
|
|
|
int avg;
|
|
|
|
|
|
|
|
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg);
|
|
|
|
*sse = var;
|
2012-11-07 01:58:11 +01:00
|
|
|
return (var - (((unsigned int)avg * avg) >> 7));
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_variance16x8_c(const uint8_t *src_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int source_stride,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *ref_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
2012-07-14 00:21:29 +02:00
|
|
|
unsigned int var;
|
|
|
|
int avg;
|
|
|
|
|
|
|
|
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg);
|
|
|
|
*sse = var;
|
2012-11-07 01:58:11 +01:00
|
|
|
return (var - (((unsigned int)avg * avg) >> 7));
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_variance8x8_c(const uint8_t *src_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int source_stride,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *ref_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
2012-07-14 00:21:29 +02:00
|
|
|
unsigned int var;
|
|
|
|
int avg;
|
|
|
|
|
|
|
|
variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg);
|
|
|
|
*sse = var;
|
2012-11-07 01:58:11 +01:00
|
|
|
return (var - (((unsigned int)avg * avg) >> 6));
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_variance4x4_c(const uint8_t *src_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int source_stride,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *ref_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
2012-07-14 00:21:29 +02:00
|
|
|
unsigned int var;
|
|
|
|
int avg;
|
|
|
|
|
|
|
|
variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg);
|
|
|
|
*sse = var;
|
2012-11-07 01:58:11 +01:00
|
|
|
return (var - (((unsigned int)avg * avg) >> 4));
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_mse16x16_c(const uint8_t *src_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int source_stride,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *ref_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
2012-07-14 00:21:29 +02:00
|
|
|
unsigned int var;
|
|
|
|
int avg;
|
|
|
|
|
|
|
|
variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg);
|
|
|
|
*sse = var;
|
|
|
|
return var;
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int src_pixels_per_line,
|
|
|
|
int xoffset,
|
|
|
|
int yoffset,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *dst_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int dst_pixels_per_line,
|
|
|
|
unsigned int *sse) {
|
2012-12-19 00:31:19 +01:00
|
|
|
uint8_t temp2[20 * 16];
|
|
|
|
const int16_t *HFilter, *VFilter;
|
|
|
|
uint16_t FData3[5 * 4]; // Temp data bufffer used in filtering
|
2012-07-14 00:21:29 +02:00
|
|
|
|
Convert subpixel filters to use convolve framework
Update the code to call the new convolution functions to do subpixel
prediction rather than the existing functions. Remove the old C and
assembly code, since it is unused. This causes a 50% performance
reduction on the decoder, but that will be resolved when the asm for
the new functions is available.
There is no consensus for whether 6-tap or 2-tap predictors will be
supported in the final codec, so these filters are implemented in
terms of the 8-tap code, so that quality testing of these modes
can continue. Implementing the lower complexity algorithms is a
simple exercise, should it be necessary.
This code produces slightly better results in the EIGHTTAP_SMOOTH
case, since the filter is now applied in only one direction when
the subpel motion is only in one direction. Like the previous code,
the filtering is skipped entirely on full-pel MVs. This combination
seems to give the best quality gains, but this may be indicative of a
bug in the encoder's filter selection, since the encoder could
achieve the result of skipping the filtering on full-pel by selecting
one of the other filters. This should be revisited.
Quality gains on derf positive on almost all clips. The only clip
that seemed to be hurt at all datarates was football
(-0.115% PSNR average, -0.587% min). Overall averages 0.375% PSNR,
0.347% SSIM.
Change-Id: I7d469716091b1d89b4b08adde5863999319d69ff
2013-01-29 01:59:03 +01:00
|
|
|
HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
|
|
|
|
VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
// First filter 1d Horizontal
|
|
|
|
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 5, 4, HFilter);
|
|
|
|
|
|
|
|
// Now filter Verticaly
|
|
|
|
var_filter_block2d_bil_second_pass(FData3, temp2, 4, 4, 4, 4, VFilter);
|
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
return vp9_variance4x4_c(temp2, 4, dst_ptr, dst_pixels_per_line, sse);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int src_pixels_per_line,
|
|
|
|
int xoffset,
|
|
|
|
int yoffset,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *dst_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int dst_pixels_per_line,
|
|
|
|
unsigned int *sse) {
|
2012-12-19 00:31:19 +01:00
|
|
|
uint16_t FData3[9 * 8]; // Temp data bufffer used in filtering
|
|
|
|
uint8_t temp2[20 * 16];
|
|
|
|
const int16_t *HFilter, *VFilter;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
Convert subpixel filters to use convolve framework
Update the code to call the new convolution functions to do subpixel
prediction rather than the existing functions. Remove the old C and
assembly code, since it is unused. This causes a 50% performance
reduction on the decoder, but that will be resolved when the asm for
the new functions is available.
There is no consensus for whether 6-tap or 2-tap predictors will be
supported in the final codec, so these filters are implemented in
terms of the 8-tap code, so that quality testing of these modes
can continue. Implementing the lower complexity algorithms is a
simple exercise, should it be necessary.
This code produces slightly better results in the EIGHTTAP_SMOOTH
case, since the filter is now applied in only one direction when
the subpel motion is only in one direction. Like the previous code,
the filtering is skipped entirely on full-pel MVs. This combination
seems to give the best quality gains, but this may be indicative of a
bug in the encoder's filter selection, since the encoder could
achieve the result of skipping the filtering on full-pel by selecting
one of the other filters. This should be revisited.
Quality gains on derf positive on almost all clips. The only clip
that seemed to be hurt at all datarates was football
(-0.115% PSNR average, -0.587% min). Overall averages 0.375% PSNR,
0.347% SSIM.
Change-Id: I7d469716091b1d89b4b08adde5863999319d69ff
2013-01-29 01:59:03 +01:00
|
|
|
HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
|
|
|
|
VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 8, HFilter);
|
|
|
|
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 8, 8, VFilter);
|
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
return vp9_variance8x8_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int src_pixels_per_line,
|
|
|
|
int xoffset,
|
|
|
|
int yoffset,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *dst_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int dst_pixels_per_line,
|
|
|
|
unsigned int *sse) {
|
2012-12-19 00:31:19 +01:00
|
|
|
uint16_t FData3[17 * 16]; // Temp data bufffer used in filtering
|
|
|
|
uint8_t temp2[20 * 16];
|
|
|
|
const int16_t *HFilter, *VFilter;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
Convert subpixel filters to use convolve framework
Update the code to call the new convolution functions to do subpixel
prediction rather than the existing functions. Remove the old C and
assembly code, since it is unused. This causes a 50% performance
reduction on the decoder, but that will be resolved when the asm for
the new functions is available.
There is no consensus for whether 6-tap or 2-tap predictors will be
supported in the final codec, so these filters are implemented in
terms of the 8-tap code, so that quality testing of these modes
can continue. Implementing the lower complexity algorithms is a
simple exercise, should it be necessary.
This code produces slightly better results in the EIGHTTAP_SMOOTH
case, since the filter is now applied in only one direction when
the subpel motion is only in one direction. Like the previous code,
the filtering is skipped entirely on full-pel MVs. This combination
seems to give the best quality gains, but this may be indicative of a
bug in the encoder's filter selection, since the encoder could
achieve the result of skipping the filtering on full-pel by selecting
one of the other filters. This should be revisited.
Quality gains on derf positive on almost all clips. The only clip
that seemed to be hurt at all datarates was football
(-0.115% PSNR average, -0.587% min). Overall averages 0.375% PSNR,
0.347% SSIM.
Change-Id: I7d469716091b1d89b4b08adde5863999319d69ff
2013-01-29 01:59:03 +01:00
|
|
|
HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
|
|
|
|
VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 17, 16, HFilter);
|
|
|
|
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 16, 16, VFilter);
|
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
return vp9_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2013-01-06 03:20:25 +01:00
|
|
|
unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr,
|
|
|
|
int src_pixels_per_line,
|
|
|
|
int xoffset,
|
|
|
|
int yoffset,
|
|
|
|
const uint8_t *dst_ptr,
|
|
|
|
int dst_pixels_per_line,
|
|
|
|
unsigned int *sse) {
|
|
|
|
uint16_t FData3[65 * 64]; // Temp data bufffer used in filtering
|
|
|
|
uint8_t temp2[68 * 64];
|
|
|
|
const int16_t *HFilter, *VFilter;
|
|
|
|
|
Convert subpixel filters to use convolve framework
Update the code to call the new convolution functions to do subpixel
prediction rather than the existing functions. Remove the old C and
assembly code, since it is unused. This causes a 50% performance
reduction on the decoder, but that will be resolved when the asm for
the new functions is available.
There is no consensus for whether 6-tap or 2-tap predictors will be
supported in the final codec, so these filters are implemented in
terms of the 8-tap code, so that quality testing of these modes
can continue. Implementing the lower complexity algorithms is a
simple exercise, should it be necessary.
This code produces slightly better results in the EIGHTTAP_SMOOTH
case, since the filter is now applied in only one direction when
the subpel motion is only in one direction. Like the previous code,
the filtering is skipped entirely on full-pel MVs. This combination
seems to give the best quality gains, but this may be indicative of a
bug in the encoder's filter selection, since the encoder could
achieve the result of skipping the filtering on full-pel by selecting
one of the other filters. This should be revisited.
Quality gains on derf positive on almost all clips. The only clip
that seemed to be hurt at all datarates was football
(-0.115% PSNR average, -0.587% min). Overall averages 0.375% PSNR,
0.347% SSIM.
Change-Id: I7d469716091b1d89b4b08adde5863999319d69ff
2013-01-29 01:59:03 +01:00
|
|
|
HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
|
|
|
|
VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
|
2013-01-06 03:20:25 +01:00
|
|
|
|
|
|
|
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line,
|
|
|
|
1, 65, 64, HFilter);
|
|
|
|
var_filter_block2d_bil_second_pass(FData3, temp2, 64, 64, 64, 64, VFilter);
|
|
|
|
|
|
|
|
return vp9_variance64x64_c(temp2, 64, dst_ptr, dst_pixels_per_line, sse);
|
|
|
|
}
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr,
|
2012-08-20 23:43:34 +02:00
|
|
|
int src_pixels_per_line,
|
|
|
|
int xoffset,
|
|
|
|
int yoffset,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *dst_ptr,
|
2012-08-20 23:43:34 +02:00
|
|
|
int dst_pixels_per_line,
|
|
|
|
unsigned int *sse) {
|
2012-12-19 00:31:19 +01:00
|
|
|
uint16_t FData3[33 * 32]; // Temp data bufffer used in filtering
|
|
|
|
uint8_t temp2[36 * 32];
|
|
|
|
const int16_t *HFilter, *VFilter;
|
2012-08-20 23:43:34 +02:00
|
|
|
|
Convert subpixel filters to use convolve framework
Update the code to call the new convolution functions to do subpixel
prediction rather than the existing functions. Remove the old C and
assembly code, since it is unused. This causes a 50% performance
reduction on the decoder, but that will be resolved when the asm for
the new functions is available.
There is no consensus for whether 6-tap or 2-tap predictors will be
supported in the final codec, so these filters are implemented in
terms of the 8-tap code, so that quality testing of these modes
can continue. Implementing the lower complexity algorithms is a
simple exercise, should it be necessary.
This code produces slightly better results in the EIGHTTAP_SMOOTH
case, since the filter is now applied in only one direction when
the subpel motion is only in one direction. Like the previous code,
the filtering is skipped entirely on full-pel MVs. This combination
seems to give the best quality gains, but this may be indicative of a
bug in the encoder's filter selection, since the encoder could
achieve the result of skipping the filtering on full-pel by selecting
one of the other filters. This should be revisited.
Quality gains on derf positive on almost all clips. The only clip
that seemed to be hurt at all datarates was football
(-0.115% PSNR average, -0.587% min). Overall averages 0.375% PSNR,
0.347% SSIM.
Change-Id: I7d469716091b1d89b4b08adde5863999319d69ff
2013-01-29 01:59:03 +01:00
|
|
|
HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
|
|
|
|
VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
|
2012-08-20 23:43:34 +02:00
|
|
|
|
|
|
|
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 33, 32, HFilter);
|
|
|
|
var_filter_block2d_bil_second_pass(FData3, temp2, 32, 32, 32, 32, VFilter);
|
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
return vp9_variance32x32_c(temp2, 32, dst_ptr, dst_pixels_per_line, sse);
|
2012-08-20 23:43:34 +02:00
|
|
|
}
|
2010-10-26 21:34:16 +02:00
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int source_stride,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *ref_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
2012-10-30 20:58:42 +01:00
|
|
|
return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
|
2012-07-14 00:21:29 +02:00
|
|
|
ref_ptr, recon_stride, sse);
|
2010-10-27 17:28:43 +02:00
|
|
|
}
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
|
2012-08-20 23:43:34 +02:00
|
|
|
int source_stride,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *ref_ptr,
|
2012-08-20 23:43:34 +02:00
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
2012-10-30 20:58:42 +01:00
|
|
|
return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
|
2012-08-20 23:43:34 +02:00
|
|
|
ref_ptr, recon_stride, sse);
|
|
|
|
}
|
2013-01-06 03:20:25 +01:00
|
|
|
|
|
|
|
unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
|
|
|
|
int source_stride,
|
|
|
|
const uint8_t *ref_ptr,
|
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
|
|
|
return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
|
|
|
|
ref_ptr, recon_stride, sse);
|
|
|
|
}
|
2010-10-27 17:28:43 +02:00
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
|
2012-08-20 23:43:34 +02:00
|
|
|
int source_stride,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *ref_ptr,
|
2012-08-20 23:43:34 +02:00
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
2012-10-30 20:58:42 +01:00
|
|
|
return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
|
2012-08-20 23:43:34 +02:00
|
|
|
ref_ptr, recon_stride, sse);
|
|
|
|
}
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int source_stride,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *ref_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
2012-10-30 20:58:42 +01:00
|
|
|
return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
|
2012-07-14 00:21:29 +02:00
|
|
|
ref_ptr, recon_stride, sse);
|
2010-10-27 17:28:43 +02:00
|
|
|
}
|
2013-01-06 03:20:25 +01:00
|
|
|
|
|
|
|
unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
|
|
|
|
int source_stride,
|
|
|
|
const uint8_t *ref_ptr,
|
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
|
|
|
return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
|
|
|
|
ref_ptr, recon_stride, sse);
|
|
|
|
}
|
2010-10-27 17:28:43 +02:00
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int source_stride,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *ref_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
2012-10-30 20:58:42 +01:00
|
|
|
return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
|
2012-07-14 00:21:29 +02:00
|
|
|
ref_ptr, recon_stride, sse);
|
2010-10-26 21:34:16 +02:00
|
|
|
}
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
|
2012-08-20 23:43:34 +02:00
|
|
|
int source_stride,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *ref_ptr,
|
2012-08-20 23:43:34 +02:00
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
2012-10-30 20:58:42 +01:00
|
|
|
return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
|
2012-08-20 23:43:34 +02:00
|
|
|
ref_ptr, recon_stride, sse);
|
|
|
|
}
|
2013-01-06 03:20:25 +01:00
|
|
|
|
|
|
|
unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
|
|
|
|
int source_stride,
|
|
|
|
const uint8_t *ref_ptr,
|
|
|
|
int recon_stride,
|
|
|
|
unsigned int *sse) {
|
|
|
|
return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
|
|
|
|
ref_ptr, recon_stride, sse);
|
|
|
|
}
|
2010-10-26 21:34:16 +02:00
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int src_pixels_per_line,
|
|
|
|
int xoffset,
|
|
|
|
int yoffset,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *dst_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int dst_pixels_per_line,
|
|
|
|
unsigned int *sse) {
|
|
|
|
vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
|
|
|
|
xoffset, yoffset, dst_ptr,
|
|
|
|
dst_pixels_per_line, sse);
|
2012-07-14 00:21:29 +02:00
|
|
|
return *sse;
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
|
2012-08-20 23:43:34 +02:00
|
|
|
int src_pixels_per_line,
|
|
|
|
int xoffset,
|
|
|
|
int yoffset,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *dst_ptr,
|
2012-08-20 23:43:34 +02:00
|
|
|
int dst_pixels_per_line,
|
|
|
|
unsigned int *sse) {
|
2012-10-30 22:25:33 +01:00
|
|
|
vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
|
|
|
|
xoffset, yoffset, dst_ptr,
|
|
|
|
dst_pixels_per_line, sse);
|
2012-08-20 23:43:34 +02:00
|
|
|
return *sse;
|
|
|
|
}
|
2013-01-06 03:20:25 +01:00
|
|
|
|
|
|
|
unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
|
|
|
|
int src_pixels_per_line,
|
|
|
|
int xoffset,
|
|
|
|
int yoffset,
|
|
|
|
const uint8_t *dst_ptr,
|
|
|
|
int dst_pixels_per_line,
|
|
|
|
unsigned int *sse) {
|
|
|
|
vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
|
|
|
|
xoffset, yoffset, dst_ptr,
|
|
|
|
dst_pixels_per_line, sse);
|
|
|
|
return *sse;
|
|
|
|
}
|
2012-08-20 23:43:34 +02:00
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int src_pixels_per_line,
|
|
|
|
int xoffset,
|
|
|
|
int yoffset,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *dst_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int dst_pixels_per_line,
|
|
|
|
unsigned int *sse) {
|
2012-12-19 00:31:19 +01:00
|
|
|
uint16_t FData3[16 * 9]; // Temp data bufffer used in filtering
|
|
|
|
uint8_t temp2[20 * 16];
|
|
|
|
const int16_t *HFilter, *VFilter;
|
2012-07-14 00:21:29 +02:00
|
|
|
|
Convert subpixel filters to use convolve framework
Update the code to call the new convolution functions to do subpixel
prediction rather than the existing functions. Remove the old C and
assembly code, since it is unused. This causes a 50% performance
reduction on the decoder, but that will be resolved when the asm for
the new functions is available.
There is no consensus for whether 6-tap or 2-tap predictors will be
supported in the final codec, so these filters are implemented in
terms of the 8-tap code, so that quality testing of these modes
can continue. Implementing the lower complexity algorithms is a
simple exercise, should it be necessary.
This code produces slightly better results in the EIGHTTAP_SMOOTH
case, since the filter is now applied in only one direction when
the subpel motion is only in one direction. Like the previous code,
the filtering is skipped entirely on full-pel MVs. This combination
seems to give the best quality gains, but this may be indicative of a
bug in the encoder's filter selection, since the encoder could
achieve the result of skipping the filtering on full-pel by selecting
one of the other filters. This should be revisited.
Quality gains on derf positive on almost all clips. The only clip
that seemed to be hurt at all datarates was football
(-0.115% PSNR average, -0.587% min). Overall averages 0.375% PSNR,
0.347% SSIM.
Change-Id: I7d469716091b1d89b4b08adde5863999319d69ff
2013-01-29 01:59:03 +01:00
|
|
|
HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
|
|
|
|
VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
|
2012-07-14 00:21:29 +02:00
|
|
|
|
|
|
|
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line, 1, 9, 16, HFilter);
|
|
|
|
var_filter_block2d_bil_second_pass(FData3, temp2, 16, 16, 8, 16, VFilter);
|
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
return vp9_variance16x8_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
|
|
|
|
2012-12-19 00:31:19 +01:00
|
|
|
unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int src_pixels_per_line,
|
|
|
|
int xoffset,
|
|
|
|
int yoffset,
|
2012-12-19 00:31:19 +01:00
|
|
|
const uint8_t *dst_ptr,
|
2012-10-30 22:25:33 +01:00
|
|
|
int dst_pixels_per_line,
|
|
|
|
unsigned int *sse) {
|
2012-12-19 00:31:19 +01:00
|
|
|
uint16_t FData3[9 * 16]; // Temp data bufffer used in filtering
|
|
|
|
uint8_t temp2[20 * 16];
|
|
|
|
const int16_t *HFilter, *VFilter;
|
2010-05-18 17:58:33 +02:00
|
|
|
|
Convert subpixel filters to use convolve framework
Update the code to call the new convolution functions to do subpixel
prediction rather than the existing functions. Remove the old C and
assembly code, since it is unused. This causes a 50% performance
reduction on the decoder, but that will be resolved when the asm for
the new functions is available.
There is no consensus for whether 6-tap or 2-tap predictors will be
supported in the final codec, so these filters are implemented in
terms of the 8-tap code, so that quality testing of these modes
can continue. Implementing the lower complexity algorithms is a
simple exercise, should it be necessary.
This code produces slightly better results in the EIGHTTAP_SMOOTH
case, since the filter is now applied in only one direction when
the subpel motion is only in one direction. Like the previous code,
the filtering is skipped entirely on full-pel MVs. This combination
seems to give the best quality gains, but this may be indicative of a
bug in the encoder's filter selection, since the encoder could
achieve the result of skipping the filtering on full-pel by selecting
one of the other filters. This should be revisited.
Quality gains on derf positive on almost all clips. The only clip
that seemed to be hurt at all datarates was football
(-0.115% PSNR average, -0.587% min). Overall averages 0.375% PSNR,
0.347% SSIM.
Change-Id: I7d469716091b1d89b4b08adde5863999319d69ff
2013-01-29 01:59:03 +01:00
|
|
|
HFilter = VP9_BILINEAR_FILTERS_2TAP(xoffset);
|
|
|
|
VFilter = VP9_BILINEAR_FILTERS_2TAP(yoffset);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-10-30 22:25:33 +01:00
|
|
|
var_filter_block2d_bil_first_pass(src_ptr, FData3, src_pixels_per_line,
|
|
|
|
1, 17, 8, HFilter);
|
2012-07-14 00:21:29 +02:00
|
|
|
var_filter_block2d_bil_second_pass(FData3, temp2, 8, 8, 16, 8, VFilter);
|
2010-05-18 17:58:33 +02:00
|
|
|
|
2012-10-30 20:58:42 +01:00
|
|
|
return vp9_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
|
2010-05-18 17:58:33 +02:00
|
|
|
}
|
2012-10-30 22:25:33 +01:00
|
|
|
|