Fix half-pixel variance RTCD functions
This patch fixes the system dependent entries for the half-pixel variance functions in both the RTCD and non-RTCD cases: - The generic C versions of these functions are now correct. Before all three cases called the hv code. - Wire up the ARM functions in RTCD mode - Created stubs for x86 to call the optimized subpixel functions with the correct parameters, rather than falling back to C code. Change-Id: I1d937d074d929e0eb93aacb1232cc5e0ad1c6184
This commit is contained in:
parent
24c86055c3
commit
a0ae3682aa
@ -93,6 +93,9 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
|
||||
/*cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
|
||||
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;*/
|
||||
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_neon;
|
||||
cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_neon;
|
||||
cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_neon;
|
||||
cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_neon;
|
||||
|
||||
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_neon;
|
||||
/*cpi->rtcd.variance.getmbss = vp8_get_mb_ss_c;*/
|
||||
|
@ -57,6 +57,9 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_c;
|
||||
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_c;
|
||||
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_c;
|
||||
cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_c;
|
||||
cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_c;
|
||||
cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_c;
|
||||
cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_c;
|
||||
|
||||
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_c;
|
||||
|
@ -220,17 +220,17 @@ extern prototype_subpixvariance(vp8_variance_subpixvar16x8);
|
||||
extern prototype_subpixvariance(vp8_variance_subpixvar16x16);
|
||||
|
||||
#ifndef vp8_variance_halfpixvar16x16_h
|
||||
#define vp8_variance_halfpixvar16x16_h vp8_half_pixel_variance16x16_c
|
||||
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
|
||||
#endif
|
||||
extern prototype_variance(vp8_variance_halfpixvar16x16_h);
|
||||
|
||||
#ifndef vp8_variance_halfpixvar16x16_v
|
||||
#define vp8_variance_halfpixvar16x16_v vp8_half_pixel_variance16x16_c
|
||||
#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_c
|
||||
#endif
|
||||
extern prototype_variance(vp8_variance_halfpixvar16x16_v);
|
||||
|
||||
#ifndef vp8_variance_halfpixvar16x16_hv
|
||||
#define vp8_variance_halfpixvar16x16_hv vp8_half_pixel_variance16x16_c
|
||||
#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_c
|
||||
#endif
|
||||
extern prototype_variance(vp8_variance_halfpixvar16x16_hv);
|
||||
|
||||
|
@ -461,7 +461,31 @@ unsigned int vp8_sub_pixel_variance16x16_c
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_half_pixel_variance16x16_c(
|
||||
unsigned int vp8_variance_halfpixvar16x16_h_c(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0,
|
||||
ref_ptr, recon_stride, sse);
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance_halfpixvar16x16_v_c(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4,
|
||||
ref_ptr, recon_stride, sse);
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance_halfpixvar16x16_hv_c(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
|
@ -595,3 +595,39 @@ unsigned int vp8_i_sub_pixel_variance8x16_mmx
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 7));
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance_halfpixvar16x16_h_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
|
||||
ref_ptr, recon_stride, sse);
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance_halfpixvar16x16_v_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
|
||||
ref_ptr, recon_stride, sse);
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
|
||||
const unsigned char *src_ptr,
|
||||
int source_stride,
|
||||
const unsigned char *ref_ptr,
|
||||
int recon_stride,
|
||||
unsigned int *sse)
|
||||
{
|
||||
return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
|
||||
ref_ptr, recon_stride, sse);
|
||||
}
|
||||
|
@ -513,3 +513,84 @@ unsigned int vp8_i_sub_pixel_variance8x16_wmt
|
||||
|
||||
return vp8_sub_pixel_variance8x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse);
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance_halfpixvar16x16_h_wmt(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse)
|
||||
{
|
||||
int xsum0, xsum1;
|
||||
unsigned int xxsum0, xxsum1;
|
||||
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance_halfpixvar16x16_v_wmt(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse)
|
||||
{
|
||||
int xsum0, xsum1;
|
||||
unsigned int xxsum0, xxsum1;
|
||||
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||
}
|
||||
|
||||
|
||||
unsigned int vp8_variance_halfpixvar16x16_hv_wmt(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse)
|
||||
{
|
||||
int xsum0, xsum1;
|
||||
unsigned int xxsum0, xxsum1;
|
||||
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr, src_pixels_per_line,
|
||||
dst_ptr, dst_pixels_per_line, 16,
|
||||
&xsum0, &xxsum0);
|
||||
|
||||
vp8_half_horiz_vert_variance16x_h_sse2(
|
||||
src_ptr + 8, src_pixels_per_line,
|
||||
dst_ptr + 8, dst_pixels_per_line, 16,
|
||||
&xsum1, &xxsum1);
|
||||
|
||||
xsum0 += xsum1;
|
||||
xxsum0 += xxsum1;
|
||||
*sse = xxsum0;
|
||||
return (xxsum0 - ((xsum0 * xsum0) >> 8));
|
||||
}
|
||||
|
@ -35,6 +35,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_mmx);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_mmx);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_mmx);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_mmx);
|
||||
extern prototype_variance(vp8_variance_halfpixvar16x16_h_mmx);
|
||||
extern prototype_variance(vp8_variance_halfpixvar16x16_v_mmx);
|
||||
extern prototype_variance(vp8_variance_halfpixvar16x16_hv_mmx);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_mmx);
|
||||
extern prototype_getmbss(vp8_get_mb_ss_mmx);
|
||||
extern prototype_variance(vp8_mse16x16_mmx);
|
||||
@ -89,6 +92,15 @@ extern prototype_sad(vp8_get4x4sse_cs_mmx);
|
||||
#undef vp8_variance_subpixvar16x16
|
||||
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_mmx
|
||||
|
||||
#undef vp8_variance_halfpixvar16x16_h
|
||||
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_mmx
|
||||
|
||||
#undef vp8_variance_halfpixvar16x16_v
|
||||
#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_mmx
|
||||
|
||||
#undef vp8_variance_halfpixvar16x16_hv
|
||||
#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_mmx
|
||||
|
||||
#undef vp8_variance_subpixmse16x16
|
||||
#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_mmx
|
||||
|
||||
@ -130,6 +142,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_wmt);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_wmt);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_wmt);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_wmt);
|
||||
extern prototype_variance(vp8_variance_halfpixvar16x16_h_wmt);
|
||||
extern prototype_variance(vp8_variance_halfpixvar16x16_v_wmt);
|
||||
extern prototype_variance(vp8_variance_halfpixvar16x16_hv_wmt);
|
||||
extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_wmt);
|
||||
extern prototype_getmbss(vp8_get_mb_ss_sse2);
|
||||
extern prototype_variance(vp8_mse16x16_wmt);
|
||||
@ -183,6 +198,15 @@ extern prototype_variance2(vp8_get16x16var_sse2);
|
||||
#undef vp8_variance_subpixvar16x16
|
||||
#define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_wmt
|
||||
|
||||
#undef vp8_variance_halfpixvar16x16_h
|
||||
#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_wmt
|
||||
|
||||
#undef vp8_variance_halfpixvar16x16_v
|
||||
#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_wmt
|
||||
|
||||
#undef vp8_variance_halfpixvar16x16_hv
|
||||
#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_wmt
|
||||
|
||||
#undef vp8_variance_subpixmse16x16
|
||||
#define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_wmt
|
||||
|
||||
|
@ -218,6 +218,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_mmx;
|
||||
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_mmx;
|
||||
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_mmx;
|
||||
cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_mmx;
|
||||
cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_mmx;
|
||||
cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_mmx;
|
||||
cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_mmx;
|
||||
|
||||
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_mmx;
|
||||
@ -274,6 +277,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
|
||||
cpi->rtcd.variance.subpixvar8x16 = vp8_sub_pixel_variance8x16_wmt;
|
||||
cpi->rtcd.variance.subpixvar16x8 = vp8_sub_pixel_variance16x8_wmt;
|
||||
cpi->rtcd.variance.subpixvar16x16 = vp8_sub_pixel_variance16x16_wmt;
|
||||
cpi->rtcd.variance.halfpixvar16x16_h = vp8_variance_halfpixvar16x16_h_wmt;
|
||||
cpi->rtcd.variance.halfpixvar16x16_v = vp8_variance_halfpixvar16x16_v_wmt;
|
||||
cpi->rtcd.variance.halfpixvar16x16_hv = vp8_variance_halfpixvar16x16_hv_wmt;
|
||||
cpi->rtcd.variance.subpixmse16x16 = vp8_sub_pixel_mse16x16_wmt;
|
||||
|
||||
cpi->rtcd.variance.mse16x16 = vp8_mse16x16_wmt;
|
||||
|
Loading…
x
Reference in New Issue
Block a user