Fix half-pixel variance RTCD functions

This patch fixes the system dependent entries for the half-pixel
variance functions in both the RTCD and non-RTCD cases:

  - The generic C versions of these functions are now correct.
    Before all three cases called the hv code.

  - Wire up the ARM functions in RTCD mode

  - Created stubs for x86 to call the optimized subpixel functions
    with the correct parameters, rather than falling back to C
    code.

Change-Id: I1d937d074d929e0eb93aacb1232cc5e0ad1c6184
This commit is contained in:
John Koleszar 2010-10-27 11:28:43 -04:00
parent 24c86055c3
commit a0ae3682aa
8 changed files with 181 additions and 4 deletions

View File

@ -93,6 +93,9 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
/*cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_neon;
cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_neon;
cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_neon;
cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_neon;
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon;
/*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/

View File

@ -57,6 +57,9 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;
cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c;
cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c;
cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c;
cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_c;
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;

View File

@ -220,17 +220,17 @@ extern prototype_subpixvariance(vp8_variance_subpixvar16x8);
extern prototype_subpixvariance(vp8_variance_subpixvar16x16);
#ifndef vp8_variance_halfpixvar16x16_h
#define vp8_variance_halfpixvar16x16_h vp8_half_pixel_variance16x16_c
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
#endif
extern prototype_variance(vp8_variance_halfpixvar16x16_h);
#ifndef vp8_variance_halfpixvar16x16_v
#define vp8_variance_halfpixvar16x16_v vp8_half_pixel_variance16x16_c
#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_c
#endif
extern prototype_variance(vp8_variance_halfpixvar16x16_v);
#ifndef vp8_variance_halfpixvar16x16_hv
#define vp8_variance_halfpixvar16x16_hv vp8_half_pixel_variance16x16_c
#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_c
#endif
extern prototype_variance(vp8_variance_halfpixvar16x16_hv);

View File

@ -461,7 +461,31 @@ unsigned int vp8_sub_pixel_variance16x16_c
}
unsigned int vp8_half_pixel_variance16x16_c(
unsigned int vp8_variance_halfpixvar16x16_h_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0,
ref_ptr, recon_stride, sse);
}
unsigned int vp8_variance_halfpixvar16x16_v_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4,
ref_ptr, recon_stride, sse);
}
unsigned int vp8_variance_halfpixvar16x16_hv_c(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,

View File

@ -595,3 +595,39 @@ unsigned int vp8_i_sub_pixel_variance8x16_mmx
*sse = xxsum0;
return (xxsum0 - ((xsum0 * xsum0) >> 7));
}
unsigned int vp8_variance_halfpixvar16x16_h_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
ref_ptr, recon_stride, sse);
}
unsigned int vp8_variance_halfpixvar16x16_v_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
ref_ptr, recon_stride, sse);
}
unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
const unsigned char *src_ptr,
int source_stride,
const unsigned char *ref_ptr,
int recon_stride,
unsigned int *sse)
{
return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
ref_ptr, recon_stride, sse);
}

View File

@ -513,3 +513,84 @@ unsigned int vp8_i_sub_pixel_variance8x16_wmt
return vp8_sub_pixel_variance8x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse);
}
unsigned int vp8_variance_halfpixvar16x16_h_wmt(
const unsigned char *src_ptr,
int src_pixels_per_line,
const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse)
{
int xsum0, xsum1;
unsigned int xxsum0, xxsum1;
vp8_half_horiz_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
&xsum0, &xxsum0);
vp8_half_horiz_variance16x_h_sse2(
src_ptr + 8, src_pixels_per_line,
dst_ptr + 8, dst_pixels_per_line, 16,
&xsum1, &xxsum1);
xsum0 += xsum1;
xxsum0 += xxsum1;
*sse = xxsum0;
return (xxsum0 - ((xsum0 * xsum0) >> 8));
}
unsigned int vp8_variance_halfpixvar16x16_v_wmt(
const unsigned char *src_ptr,
int src_pixels_per_line,
const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse)
{
int xsum0, xsum1;
unsigned int xxsum0, xxsum1;
vp8_half_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
&xsum0, &xxsum0);
vp8_half_vert_variance16x_h_sse2(
src_ptr + 8, src_pixels_per_line,
dst_ptr + 8, dst_pixels_per_line, 16,
&xsum1, &xxsum1);
xsum0 += xsum1;
xxsum0 += xxsum1;
*sse = xxsum0;
return (xxsum0 - ((xsum0 * xsum0) >> 8));
}
unsigned int vp8_variance_halfpixvar16x16_hv_wmt(
const unsigned char *src_ptr,
int src_pixels_per_line,
const unsigned char *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse)
{
int xsum0, xsum1;
unsigned int xxsum0, xxsum1;
vp8_half_horiz_vert_variance16x_h_sse2(
src_ptr, src_pixels_per_line,
dst_ptr, dst_pixels_per_line, 16,
&xsum0, &xxsum0);
vp8_half_horiz_vert_variance16x_h_sse2(
src_ptr + 8, src_pixels_per_line,
dst_ptr + 8, dst_pixels_per_line, 16,
&xsum1, &xxsum1);
xsum0 += xsum1;
xxsum0 += xxsum1;
*sse = xxsum0;
return (xxsum0 - ((xsum0 * xsum0) >> 8));
}

View File

@ -35,6 +35,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_mmx);
extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_mmx);
extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_mmx);
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_mmx);
extern prototype_variance(vp8_variance_halfpixvar16x16_h_mmx);
extern prototype_variance(vp8_variance_halfpixvar16x16_v_mmx);
extern prototype_variance(vp8_variance_halfpixvar16x16_hv_mmx);
extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_mmx);
extern prototype_getmbss(vp8_get_mb_ss_mmx);
extern prototype_variance(vp8_mse16x16_mmx);
@ -89,6 +92,15 @@ extern prototype_sad(vp8_get4x4sse_cs_mmx);
#undef vp8_variance_subpixvar16x16
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_mmx
#undef vp8_variance_halfpixvar16x16_h
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_mmx
#undef vp8_variance_halfpixvar16x16_v
#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_mmx
#undef vp8_variance_halfpixvar16x16_hv
#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_mmx
#undef vp8_variance_subpixmse16x16
#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_mmx
@ -130,6 +142,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_wmt);
extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_wmt);
extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_wmt);
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_wmt);
extern prototype_variance(vp8_variance_halfpixvar16x16_h_wmt);
extern prototype_variance(vp8_variance_halfpixvar16x16_v_wmt);
extern prototype_variance(vp8_variance_halfpixvar16x16_hv_wmt);
extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_wmt);
extern prototype_getmbss(vp8_get_mb_ss_sse2);
extern prototype_variance(vp8_mse16x16_wmt);
@ -183,6 +198,15 @@ extern prototype_variance2(vp8_get16x16var_sse2);
#undef vp8_variance_subpixvar16x16
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_wmt
#undef vp8_variance_halfpixvar16x16_h
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_wmt
#undef vp8_variance_halfpixvar16x16_v
#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_wmt
#undef vp8_variance_halfpixvar16x16_hv
#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_wmt
#undef vp8_variance_subpixmse16x16
#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_wmt

View File

@ -218,6 +218,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_mmx;
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_mmx;
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_mmx;
cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx;
cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_mmx;
cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_mmx;
cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_mmx;
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_mmx;
@ -274,6 +277,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_wmt;
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_wmt;
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_wmt;
cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt;
cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_wmt;
cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_wmt;
cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_wmt;
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_wmt;