Alternate rounding
Improves performance on derf by 0.89% for 10-bit internal and by 0.55% for 12-bit internal, both for 8-bit sources. Change-Id: I181fd9fb10e2259233d67cdd7933fb3cae334afc
This commit is contained in:
@@ -72,7 +72,7 @@ void vp9_high_subtract_block_c(int rows, int cols,
|
||||
#if CONFIG_HIGH_TRANSFORMS
|
||||
diff[c] = src[c] - pred[c];
|
||||
#else
|
||||
diff[c] = ((src[c] + rnd) >> shift) - ((pred[c] + rnd) >> shift);
|
||||
diff[c] = (src[c] - pred[c] + rnd) >> shift;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -93,7 +93,7 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
|
||||
#if CONFIG_VP9_HIGH
|
||||
if (x->e_mbd.cur_buf->flags&YV12_FLAG_HIGH) {
|
||||
vp9_high_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
|
||||
pd->dst.buf, pd->dst.stride, x->e_mbd.bps);
|
||||
pd->dst.buf, pd->dst.stride, x->e_mbd.bps);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
@@ -1781,16 +1781,12 @@ static int64_t high_get_sse_shift(const uint8_t *a8, int a_stride,
|
||||
unsigned int input_shift) {
|
||||
const uint16_t *a = CONVERT_TO_SHORTPTR(a8);
|
||||
const uint16_t *b = CONVERT_TO_SHORTPTR(b8);
|
||||
const int offset = (1 << (input_shift - 1));
|
||||
const int max_val = (1 << bit_depth) - 1;
|
||||
int64_t total_sse = 0;
|
||||
int x, y;
|
||||
for (y = 0; y < height; ++y) {
|
||||
for (x = 0; x < width; ++x) {
|
||||
int64_t diff;
|
||||
int16_t pix_val = (b[x] + offset);
|
||||
pix_val = pix_val > max_val ? max_val : pix_val;
|
||||
diff = (a[x] >> input_shift) - (pix_val >> input_shift);
|
||||
diff = (a[x] >> input_shift) - (b[x] >> input_shift);
|
||||
total_sse += diff * diff;
|
||||
}
|
||||
a += a_stride;
|
||||
|
@@ -67,13 +67,11 @@ void vp9_high_ssim_parms_8x8_shift_c(uint16_t *s, int sp, uint16_t *r, int rp,
|
||||
uint32_t *sum_sxr, unsigned int bps,
|
||||
unsigned int shift) {
|
||||
int i, j;
|
||||
const int offset = (1 << (shift - 1));
|
||||
const int max_val = (1 << bps) - 1;
|
||||
for (i = 0; i < 8; i++, s += sp, r += rp) {
|
||||
for (j = 0; j < 8; j++) {
|
||||
int sj = s[j];
|
||||
int rj = r[j] + offset;
|
||||
rj = ((rj > max_val ? max_val : rj) >> shift) << shift;
|
||||
int rj = r[j];
|
||||
*sum_s += sj;
|
||||
*sum_r += rj;
|
||||
*sum_sq_s += sj * sj;
|
||||
|
6
vpxdec.c
6
vpxdec.c
@@ -522,9 +522,6 @@ static FILE *open_outfile(const char *name) {
|
||||
static void img_convert_16_to_8(vpx_image_t *dst, vpx_image_t *src,
|
||||
int output_shift) {
|
||||
int plane;
|
||||
int offset = 0;
|
||||
if (output_shift > 0)
|
||||
offset = 1 << (output_shift - 1);
|
||||
if (src->fmt != dst->fmt + VPX_IMG_FMT_HIGH ||
|
||||
dst->d_w != src->d_w || dst->d_h != src->d_h ||
|
||||
dst->x_chroma_shift != src->x_chroma_shift ||
|
||||
@@ -553,8 +550,7 @@ static void img_convert_16_to_8(vpx_image_t *dst, vpx_image_t *src,
|
||||
uint16_t *p_src = (uint16_t *)(src->planes[plane] +
|
||||
y * src->stride[plane]);
|
||||
for (x = 0; x < w; x++) {
|
||||
int pix_val = (*p_src++ + offset) >> output_shift;
|
||||
*p_dst++ = pix_val > 255 ? 255 : pix_val;
|
||||
*p_dst++ = *p_src++ >> output_shift;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
4
vpxenc.c
4
vpxenc.c
@@ -1630,6 +1630,8 @@ static float usec_to_fps(uint64_t usec, unsigned int frames) {
|
||||
#if CONFIG_VP9_HIGH
|
||||
static void img_convert_8_to_16(vpx_image_t *dst, vpx_image_t *src,
|
||||
int input_shift) {
|
||||
// Note the offset is 1 less than half
|
||||
const int offset = input_shift > 0 ? (1 << (input_shift - 1)) - 1 : 0;
|
||||
int plane;
|
||||
if (dst->fmt != src->fmt + VPX_IMG_FMT_HIGH ||
|
||||
dst->w != src->w || dst->h != src->h ||
|
||||
@@ -1659,7 +1661,7 @@ static void img_convert_8_to_16(vpx_image_t *dst, vpx_image_t *src,
|
||||
uint16_t *p_dst = (uint16_t *)(dst->planes[plane] +
|
||||
y * dst->stride[plane]);
|
||||
for (x = 0; x < w; x++) {
|
||||
*p_dst++ = *p_src++ << input_shift;
|
||||
*p_dst++ = (*p_src++ << input_shift) + offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user