diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index e501c1048..c2d6eb406 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -1118,6 +1118,80 @@ specialize qw/vp9_high_variance4x8/; add_proto qw/unsigned int vp9_high_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_high_variance4x4/; +add_proto qw/unsigned int vp9_high_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_variance32x16/; +add_proto qw/unsigned int vp9_high_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_variance16x32/; +add_proto qw/unsigned int vp9_high_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_variance64x32/; +add_proto qw/unsigned int vp9_high_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_variance32x64/; + +add_proto qw/unsigned int vp9_high_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_variance32x32/; + +add_proto qw/unsigned int vp9_high_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_variance64x64/; + +add_proto qw/unsigned int vp9_high_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_variance16x16/; + +add_proto qw/unsigned int vp9_high_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_variance16x8/; + +add_proto qw/unsigned int vp9_high_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_variance8x16/; + +add_proto qw/unsigned int vp9_high_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_variance8x8/; + +add_proto qw/unsigned int vp9_high_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_variance8x4/; + +add_proto qw/unsigned int vp9_high_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_variance4x8/; + +add_proto qw/unsigned int vp9_high_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_variance4x4/; + +add_proto qw/unsigned int vp9_high_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_variance32x16/; + +add_proto qw/unsigned int vp9_high_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_variance16x32/; + +add_proto qw/unsigned int vp9_high_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_variance64x32/; + +add_proto qw/unsigned int vp9_high_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_variance32x64/; + +add_proto qw/unsigned int vp9_high_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_variance32x32/; + +add_proto qw/unsigned int vp9_high_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_variance64x64/; + +add_proto qw/unsigned int vp9_high_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_variance16x16/; + +add_proto qw/unsigned int vp9_high_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_variance16x8/; + +add_proto qw/unsigned int vp9_high_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_variance8x16/; + +add_proto qw/unsigned int vp9_high_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_variance8x8/; + +add_proto qw/unsigned int vp9_high_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_variance8x4/; + +add_proto qw/unsigned int vp9_high_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_variance4x8/; + +add_proto qw/unsigned int vp9_high_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_variance4x4/; add_proto qw/unsigned int vp9_high_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; specialize qw/vp9_high_sub_pixel_variance64x64/; @@ -1197,6 +1271,162 @@ specialize qw/vp9_high_sub_pixel_variance4x4/; add_proto qw/unsigned int vp9_high_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; specialize qw/vp9_high_sub_pixel_avg_variance4x4/; +add_proto qw/unsigned int vp9_high_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_sub_pixel_variance64x64/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_10_sub_pixel_avg_variance64x64/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_sub_pixel_variance32x64/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_10_sub_pixel_avg_variance32x64/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_sub_pixel_variance64x32/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_10_sub_pixel_avg_variance64x32/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_sub_pixel_variance32x16/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_10_sub_pixel_avg_variance32x16/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_sub_pixel_variance16x32/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_10_sub_pixel_avg_variance16x32/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_sub_pixel_variance32x32/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_10_sub_pixel_avg_variance32x32/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_sub_pixel_variance16x16/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_10_sub_pixel_avg_variance16x16/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_sub_pixel_variance8x16/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_10_sub_pixel_avg_variance8x16/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_sub_pixel_variance16x8/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_10_sub_pixel_avg_variance16x8/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_sub_pixel_variance8x8/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_10_sub_pixel_avg_variance8x8/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_sub_pixel_variance8x4/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_10_sub_pixel_avg_variance8x4/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_sub_pixel_variance4x8/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_10_sub_pixel_avg_variance4x8/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_10_sub_pixel_variance4x4/; + +add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_10_sub_pixel_avg_variance4x4/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_sub_pixel_variance64x64/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_12_sub_pixel_avg_variance64x64/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_sub_pixel_variance32x64/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_12_sub_pixel_avg_variance32x64/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_sub_pixel_variance64x32/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_12_sub_pixel_avg_variance64x32/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_sub_pixel_variance32x16/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_12_sub_pixel_avg_variance32x16/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_sub_pixel_variance16x32/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_12_sub_pixel_avg_variance16x32/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_sub_pixel_variance32x32/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_12_sub_pixel_avg_variance32x32/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_sub_pixel_variance16x16/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_12_sub_pixel_avg_variance16x16/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_sub_pixel_variance8x16/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_12_sub_pixel_avg_variance8x16/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_sub_pixel_variance16x8/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_12_sub_pixel_avg_variance16x8/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_sub_pixel_variance8x8/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_12_sub_pixel_avg_variance8x8/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_sub_pixel_variance8x4/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_12_sub_pixel_avg_variance8x4/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_sub_pixel_variance4x8/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_12_sub_pixel_avg_variance4x8/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; +specialize qw/vp9_high_12_sub_pixel_variance4x4/; + +add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred"; +specialize qw/vp9_high_12_sub_pixel_avg_variance4x4/; + add_proto qw/unsigned int vp9_high_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad"; specialize qw/vp9_high_sad64x64/; @@ -1275,33 +1505,6 @@ specialize qw/vp9_high_sad4x8_avg/; add_proto qw/unsigned int vp9_high_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad"; specialize qw/vp9_high_sad4x4_avg/; -add_proto qw/unsigned int vp9_high_variance_halfpixvar16x16_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_high_variance_halfpixvar16x16_h/; - -add_proto qw/unsigned int vp9_high_variance_halfpixvar16x16_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_high_variance_halfpixvar16x16_v/; - -add_proto qw/unsigned int vp9_high_variance_halfpixvar16x16_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_high_variance_halfpixvar16x16_hv/; - -add_proto qw/unsigned int vp9_high_variance_halfpixvar64x64_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_high_variance_halfpixvar64x64_h/; - -add_proto qw/unsigned int vp9_high_variance_halfpixvar64x64_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_high_variance_halfpixvar64x64_v/; - -add_proto qw/unsigned int vp9_high_variance_halfpixvar64x64_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_high_variance_halfpixvar64x64_hv/; - -add_proto qw/unsigned int vp9_high_variance_halfpixvar32x32_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_high_variance_halfpixvar32x32_h/; - -add_proto qw/unsigned int vp9_high_variance_halfpixvar32x32_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_high_variance_halfpixvar32x32_v/; - -add_proto qw/unsigned int vp9_high_variance_halfpixvar32x32_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_high_variance_halfpixvar32x32_hv/; - add_proto qw/void vp9_high_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array"; specialize qw/vp9_high_sad64x64x3/; @@ -1402,11 +1605,29 @@ specialize qw/vp9_high_mse16x8/; add_proto qw/unsigned int vp9_high_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; specialize qw/vp9_high_mse8x8/; -add_proto qw/unsigned int vp9_high_sub_pixel_mse64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_high_sub_pixel_mse64x64/; +add_proto qw/unsigned int vp9_high_10_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_high_10_mse16x16/; -add_proto qw/unsigned int vp9_high_sub_pixel_mse32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; -specialize qw/vp9_high_sub_pixel_mse32x32/; +add_proto qw/unsigned int vp9_high_10_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_high_10_mse8x16/; + +add_proto qw/unsigned int vp9_high_10_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_high_10_mse16x8/; + +add_proto qw/unsigned int vp9_high_10_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_high_10_mse8x8/; + +add_proto qw/unsigned int vp9_high_12_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_high_12_mse16x16/; + +add_proto qw/unsigned int vp9_high_12_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_high_12_mse8x16/; + +add_proto qw/unsigned int vp9_high_12_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_high_12_mse16x8/; + +add_proto qw/unsigned int vp9_high_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse"; +specialize qw/vp9_high_12_mse8x8/; add_proto qw/unsigned int vp9_high_get_mb_ss/, "const int16_t *"; specialize qw/vp9_high_get_mb_ss/; diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 9bc11e05d..9d5f5da61 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -972,159 +972,6 @@ static void fnname##_bits12(const uint8_t *src_ptr, \ sad_array[i] >>= 4; \ } -#define MAKE_BFP_WRAPPER(fnname) \ -static unsigned int fnname##_bits8(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sse) { \ - unsigned int val = fnname(src_ptr, source_stride, ref_ptr, ref_stride, sse); \ - return val; \ -} \ -static unsigned int fnname##_bits10(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sse) { \ - unsigned int val = fnname(src_ptr, source_stride, ref_ptr, ref_stride, sse); \ - *sse >>= 4; \ - return val >> 4; \ -} \ -static unsigned int fnname##_bits12(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sse) { \ - unsigned int val = fnname(src_ptr, source_stride, ref_ptr, ref_stride, sse); \ - *sse >>= 8; \ - return val >> 8; \ -} - -#define MAKE_BFP_SUB_WRAPPER(fnname) \ -static unsigned int fnname##_bits8(const uint8_t *src_ptr, \ - int source_stride, \ - int xoffset, \ - int yoffset, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sse) { \ - unsigned int val = fnname(src_ptr, source_stride, xoffset, yoffset, ref_ptr, \ - ref_stride, sse); \ - return val; \ -} \ -static unsigned int fnname##_bits10(const uint8_t *src_ptr, \ - int source_stride, \ - int xoffset, \ - int yoffset, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sse) { \ - unsigned int val = fnname(src_ptr, source_stride, xoffset, yoffset, ref_ptr, \ - ref_stride, sse); \ - *sse >>= 4; \ - return val >> 4; \ -} \ -static unsigned int fnname##_bits12(const uint8_t *src_ptr, \ - int source_stride, \ - int xoffset, \ - int yoffset, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sse) { \ - unsigned int val = fnname(src_ptr, source_stride, xoffset, yoffset, ref_ptr, \ - ref_stride, sse); \ - *sse >>= 8; \ - return val >> 8; \ -} - -#define MAKE_BFP_SUB_AVG_WRAPPER(fnname) \ -static unsigned int fnname##_bits8(const uint8_t *src_ptr, \ - int source_stride, \ - int xoffset, \ - int yoffset, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sse, \ - const uint8_t *second_pred) { \ - unsigned int val = fnname(src_ptr, source_stride, xoffset, yoffset, ref_ptr, \ - ref_stride, sse, second_pred); \ - return val; \ -} \ -static unsigned int fnname##_bits10(const uint8_t *src_ptr, \ - int source_stride, \ - int xoffset, \ - int yoffset, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sse, \ - const uint8_t *second_pred) { \ - unsigned int val = fnname(src_ptr, source_stride, xoffset, yoffset, \ - ref_ptr, ref_stride, sse, second_pred); \ - *sse >>= 4; \ - return val >> 4; \ -} \ -static unsigned int fnname##_bits12(const uint8_t *src_ptr, \ - int source_stride, \ - int xoffset, \ - int yoffset, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sse, \ - const uint8_t *second_pred) { \ - unsigned int val = fnname(src_ptr, source_stride, xoffset, yoffset, \ - ref_ptr, ref_stride, sse, second_pred); \ - *sse >>= 8; \ - return val >> 8; \ -} - -MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance32x16) -MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance32x16) -MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance16x32) -MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance16x32) -MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance64x32) -MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance64x32) -MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance32x64) -MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance32x64) -MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance32x32) -MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance32x32) -MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar32x32_h) -MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar32x32_v) -MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar32x32_hv) -MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance64x64) -MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance64x64) -MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar64x64_h) -MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar64x64_v) -MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar64x64_hv) -MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance16x16) -MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance16x16) -MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar16x16_h) -MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar16x16_v) -MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar16x16_hv) -MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance16x8) -MAKE_BFP_WRAPPER(vp9_high_variance16x8) -MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance16x8) -MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance8x16) -MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance8x16) -MAKE_BFP_WRAPPER(vp9_high_variance8x16) -MAKE_BFP_WRAPPER(vp9_high_variance8x8) -MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance8x8) -MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance8x8) -MAKE_BFP_WRAPPER(vp9_high_variance8x4) -MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance8x4) -MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance8x4) -MAKE_BFP_WRAPPER(vp9_high_variance4x8) -MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance4x8) -MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance4x8) -MAKE_BFP_WRAPPER(vp9_high_variance4x4) -MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance4x4) -MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance4x4) -MAKE_BFP_WRAPPER(vp9_high_variance32x16) -MAKE_BFP_WRAPPER(vp9_high_variance16x32) -MAKE_BFP_WRAPPER(vp9_high_variance64x32) -MAKE_BFP_WRAPPER(vp9_high_variance32x64) -MAKE_BFP_WRAPPER(vp9_high_variance32x32) -MAKE_BFP_WRAPPER(vp9_high_variance64x64) -MAKE_BFP_WRAPPER(vp9_high_variance16x16) MAKE_BFP_SAD_WRAPPER(vp9_high_sad32x16) MAKE_BFP_SADAVG_WRAPPER(vp9_high_sad32x16_avg) MAKE_BFP_SAD4D_WRAPPER(vp9_high_sad32x16x4d) @@ -1188,107 +1035,107 @@ static void high_set_var_fns(VP9_COMP *const cpi) { default: HIGH_BFP(BLOCK_32X16, vp9_high_sad32x16_bits8, vp9_high_sad32x16_avg_bits8, - vp9_high_variance32x16_bits8, - vp9_high_sub_pixel_variance32x16_bits8, - vp9_high_sub_pixel_avg_variance32x16_bits8, + vp9_high_variance32x16, + vp9_high_sub_pixel_variance32x16, + vp9_high_sub_pixel_avg_variance32x16, NULL, NULL, vp9_high_sad32x16x4d_bits8) HIGH_BFP(BLOCK_16X32, vp9_high_sad16x32_bits8, vp9_high_sad16x32_avg_bits8, - vp9_high_variance16x32_bits8, - vp9_high_sub_pixel_variance16x32_bits8, - vp9_high_sub_pixel_avg_variance16x32_bits8, + vp9_high_variance16x32, + vp9_high_sub_pixel_variance16x32, + vp9_high_sub_pixel_avg_variance16x32, NULL, NULL, vp9_high_sad16x32x4d_bits8) HIGH_BFP(BLOCK_64X32, vp9_high_sad64x32_bits8, vp9_high_sad64x32_avg_bits8, - vp9_high_variance64x32_bits8, - vp9_high_sub_pixel_variance64x32_bits8, - vp9_high_sub_pixel_avg_variance64x32_bits8, + vp9_high_variance64x32, + vp9_high_sub_pixel_variance64x32, + vp9_high_sub_pixel_avg_variance64x32, NULL, NULL, vp9_high_sad64x32x4d_bits8) HIGH_BFP(BLOCK_32X64, vp9_high_sad32x64_bits8, vp9_high_sad32x64_avg_bits8, - vp9_high_variance32x64_bits8, - vp9_high_sub_pixel_variance32x64_bits8, - vp9_high_sub_pixel_avg_variance32x64_bits8, + vp9_high_variance32x64, + vp9_high_sub_pixel_variance32x64, + vp9_high_sub_pixel_avg_variance32x64, NULL, NULL, vp9_high_sad32x64x4d_bits8) HIGH_BFP(BLOCK_32X32, vp9_high_sad32x32_bits8, vp9_high_sad32x32_avg_bits8, - vp9_high_variance32x32_bits8, - vp9_high_sub_pixel_variance32x32_bits8, - vp9_high_sub_pixel_avg_variance32x32_bits8, + vp9_high_variance32x32, + vp9_high_sub_pixel_variance32x32, + vp9_high_sub_pixel_avg_variance32x32, vp9_high_sad32x32x3_bits8, vp9_high_sad32x32x8_bits8, vp9_high_sad32x32x4d_bits8) HIGH_BFP(BLOCK_64X64, vp9_high_sad64x64_bits8, vp9_high_sad64x64_avg_bits8, - vp9_high_variance64x64_bits8, - vp9_high_sub_pixel_variance64x64_bits8, - vp9_high_sub_pixel_avg_variance64x64_bits8, + vp9_high_variance64x64, + vp9_high_sub_pixel_variance64x64, + vp9_high_sub_pixel_avg_variance64x64, vp9_high_sad64x64x3_bits8, vp9_high_sad64x64x8_bits8, vp9_high_sad64x64x4d_bits8) HIGH_BFP(BLOCK_16X16, vp9_high_sad16x16_bits8, vp9_high_sad16x16_avg_bits8, - vp9_high_variance16x16_bits8, - vp9_high_sub_pixel_variance16x16_bits8, - vp9_high_sub_pixel_avg_variance16x16_bits8, + vp9_high_variance16x16, + vp9_high_sub_pixel_variance16x16, + vp9_high_sub_pixel_avg_variance16x16, vp9_high_sad16x16x3_bits8, vp9_high_sad16x16x8_bits8, vp9_high_sad16x16x4d_bits8) HIGH_BFP(BLOCK_16X8, vp9_high_sad16x8_bits8, vp9_high_sad16x8_avg_bits8, - vp9_high_variance16x8_bits8, - vp9_high_sub_pixel_variance16x8_bits8, - vp9_high_sub_pixel_avg_variance16x8_bits8, + vp9_high_variance16x8, + vp9_high_sub_pixel_variance16x8, + vp9_high_sub_pixel_avg_variance16x8, vp9_high_sad16x8x3_bits8, vp9_high_sad16x8x8_bits8, vp9_high_sad16x8x4d_bits8) HIGH_BFP(BLOCK_8X16, vp9_high_sad8x16_bits8, vp9_high_sad8x16_avg_bits8, - vp9_high_variance8x16_bits8, - vp9_high_sub_pixel_variance8x16_bits8, - vp9_high_sub_pixel_avg_variance8x16_bits8, + vp9_high_variance8x16, + vp9_high_sub_pixel_variance8x16, + vp9_high_sub_pixel_avg_variance8x16, vp9_high_sad8x16x3_bits8, vp9_high_sad8x16x8_bits8, vp9_high_sad8x16x4d_bits8) HIGH_BFP(BLOCK_8X8, vp9_high_sad8x8_bits8, vp9_high_sad8x8_avg_bits8, - vp9_high_variance8x8_bits8, - vp9_high_sub_pixel_variance8x8_bits8, - vp9_high_sub_pixel_avg_variance8x8_bits8, + vp9_high_variance8x8, + vp9_high_sub_pixel_variance8x8, + vp9_high_sub_pixel_avg_variance8x8, vp9_high_sad8x8x3_bits8, vp9_high_sad8x8x8_bits8, vp9_high_sad8x8x4d_bits8) HIGH_BFP(BLOCK_8X4, vp9_high_sad8x4_bits8, vp9_high_sad8x4_avg_bits8, - vp9_high_variance8x4_bits8, - vp9_high_sub_pixel_variance8x4_bits8, - vp9_high_sub_pixel_avg_variance8x4_bits8, + vp9_high_variance8x4, + vp9_high_sub_pixel_variance8x4, + vp9_high_sub_pixel_avg_variance8x4, NULL, vp9_high_sad8x4x8_bits8, vp9_high_sad8x4x4d_bits8) HIGH_BFP(BLOCK_4X8, vp9_high_sad4x8_bits8, vp9_high_sad4x8_avg_bits8, - vp9_high_variance4x8_bits8, - vp9_high_sub_pixel_variance4x8_bits8, - vp9_high_sub_pixel_avg_variance4x8_bits8, + vp9_high_variance4x8, + vp9_high_sub_pixel_variance4x8, + vp9_high_sub_pixel_avg_variance4x8, NULL, vp9_high_sad4x8x8_bits8, vp9_high_sad4x8x4d_bits8) HIGH_BFP(BLOCK_4X4, vp9_high_sad4x4_bits8, vp9_high_sad4x4_avg_bits8, - vp9_high_variance4x4_bits8, - vp9_high_sub_pixel_variance4x4_bits8, - vp9_high_sub_pixel_avg_variance4x4_bits8, + vp9_high_variance4x4, + vp9_high_sub_pixel_variance4x4, + vp9_high_sub_pixel_avg_variance4x4, vp9_high_sad4x4x3_bits8, vp9_high_sad4x4x8_bits8, vp9_high_sad4x4x4d_bits8) @@ -1297,107 +1144,107 @@ static void high_set_var_fns(VP9_COMP *const cpi) { case BITS_10: HIGH_BFP(BLOCK_32X16, vp9_high_sad32x16_bits10, vp9_high_sad32x16_avg_bits10, - vp9_high_variance32x16_bits10, - vp9_high_sub_pixel_variance32x16_bits10, - vp9_high_sub_pixel_avg_variance32x16_bits10, + vp9_high_10_variance32x16, + vp9_high_10_sub_pixel_variance32x16, + vp9_high_10_sub_pixel_avg_variance32x16, NULL, NULL, vp9_high_sad32x16x4d_bits10) HIGH_BFP(BLOCK_16X32, vp9_high_sad16x32_bits10, vp9_high_sad16x32_avg_bits10, - vp9_high_variance16x32_bits10, - vp9_high_sub_pixel_variance16x32_bits10, - vp9_high_sub_pixel_avg_variance16x32_bits10, + vp9_high_10_variance16x32, + vp9_high_10_sub_pixel_variance16x32, + vp9_high_10_sub_pixel_avg_variance16x32, NULL, NULL, vp9_high_sad16x32x4d_bits10) HIGH_BFP(BLOCK_64X32, vp9_high_sad64x32_bits10, vp9_high_sad64x32_avg_bits10, - vp9_high_variance64x32_bits10, - vp9_high_sub_pixel_variance64x32_bits10, - vp9_high_sub_pixel_avg_variance64x32_bits10, + vp9_high_10_variance64x32, + vp9_high_10_sub_pixel_variance64x32, + vp9_high_10_sub_pixel_avg_variance64x32, NULL, NULL, vp9_high_sad64x32x4d_bits10) HIGH_BFP(BLOCK_32X64, vp9_high_sad32x64_bits10, vp9_high_sad32x64_avg_bits10, - vp9_high_variance32x64_bits10, - vp9_high_sub_pixel_variance32x64_bits10, - vp9_high_sub_pixel_avg_variance32x64_bits10, + vp9_high_10_variance32x64, + vp9_high_10_sub_pixel_variance32x64, + vp9_high_10_sub_pixel_avg_variance32x64, NULL, NULL, vp9_high_sad32x64x4d_bits10) HIGH_BFP(BLOCK_32X32, vp9_high_sad32x32_bits10, vp9_high_sad32x32_avg_bits10, - vp9_high_variance32x32_bits10, - vp9_high_sub_pixel_variance32x32_bits10, - vp9_high_sub_pixel_avg_variance32x32_bits10, + vp9_high_10_variance32x32, + vp9_high_10_sub_pixel_variance32x32, + vp9_high_10_sub_pixel_avg_variance32x32, vp9_high_sad32x32x3_bits10, vp9_high_sad32x32x8_bits10, vp9_high_sad32x32x4d_bits10) HIGH_BFP(BLOCK_64X64, vp9_high_sad64x64_bits10, vp9_high_sad64x64_avg_bits10, - vp9_high_variance64x64_bits10, - vp9_high_sub_pixel_variance64x64_bits10, - vp9_high_sub_pixel_avg_variance64x64_bits10, + vp9_high_10_variance64x64, + vp9_high_10_sub_pixel_variance64x64, + vp9_high_10_sub_pixel_avg_variance64x64, vp9_high_sad64x64x3_bits10, vp9_high_sad64x64x8_bits10, vp9_high_sad64x64x4d_bits10) HIGH_BFP(BLOCK_16X16, vp9_high_sad16x16_bits10, vp9_high_sad16x16_avg_bits10, - vp9_high_variance16x16_bits10, - vp9_high_sub_pixel_variance16x16_bits10, - vp9_high_sub_pixel_avg_variance16x16_bits10, + vp9_high_10_variance16x16, + vp9_high_10_sub_pixel_variance16x16, + vp9_high_10_sub_pixel_avg_variance16x16, vp9_high_sad16x16x3_bits10, vp9_high_sad16x16x8_bits10, vp9_high_sad16x16x4d_bits10) HIGH_BFP(BLOCK_16X8, vp9_high_sad16x8_bits10, vp9_high_sad16x8_avg_bits10, - vp9_high_variance16x8_bits10, - vp9_high_sub_pixel_variance16x8_bits10, - vp9_high_sub_pixel_avg_variance16x8_bits10, + vp9_high_10_variance16x8, + vp9_high_10_sub_pixel_variance16x8, + vp9_high_10_sub_pixel_avg_variance16x8, vp9_high_sad16x8x3_bits10, vp9_high_sad16x8x8_bits10, vp9_high_sad16x8x4d_bits10) HIGH_BFP(BLOCK_8X16, vp9_high_sad8x16_bits10, vp9_high_sad8x16_avg_bits10, - vp9_high_variance8x16_bits10, - vp9_high_sub_pixel_variance8x16_bits10, - vp9_high_sub_pixel_avg_variance8x16_bits10, + vp9_high_10_variance8x16, + vp9_high_10_sub_pixel_variance8x16, + vp9_high_10_sub_pixel_avg_variance8x16, vp9_high_sad8x16x3_bits10, vp9_high_sad8x16x8_bits10, vp9_high_sad8x16x4d_bits10) HIGH_BFP(BLOCK_8X8, vp9_high_sad8x8_bits10, vp9_high_sad8x8_avg_bits10, - vp9_high_variance8x8_bits10, - vp9_high_sub_pixel_variance8x8_bits10, - vp9_high_sub_pixel_avg_variance8x8_bits10, + vp9_high_10_variance8x8, + vp9_high_10_sub_pixel_variance8x8, + vp9_high_10_sub_pixel_avg_variance8x8, vp9_high_sad8x8x3_bits10, vp9_high_sad8x8x8_bits10, vp9_high_sad8x8x4d_bits10) HIGH_BFP(BLOCK_8X4, vp9_high_sad8x4_bits10, vp9_high_sad8x4_avg_bits10, - vp9_high_variance8x4_bits10, - vp9_high_sub_pixel_variance8x4_bits10, - vp9_high_sub_pixel_avg_variance8x4_bits10, + vp9_high_10_variance8x4, + vp9_high_10_sub_pixel_variance8x4, + vp9_high_10_sub_pixel_avg_variance8x4, NULL, vp9_high_sad8x4x8_bits10, vp9_high_sad8x4x4d_bits10) HIGH_BFP(BLOCK_4X8, vp9_high_sad4x8_bits10, vp9_high_sad4x8_avg_bits10, - vp9_high_variance4x8_bits10, - vp9_high_sub_pixel_variance4x8_bits10, - vp9_high_sub_pixel_avg_variance4x8_bits10, + vp9_high_10_variance4x8, + vp9_high_10_sub_pixel_variance4x8, + vp9_high_10_sub_pixel_avg_variance4x8, NULL, vp9_high_sad4x8x8_bits10, vp9_high_sad4x8x4d_bits10) HIGH_BFP(BLOCK_4X4, vp9_high_sad4x4_bits10, vp9_high_sad4x4_avg_bits10, - vp9_high_variance4x4_bits10, - vp9_high_sub_pixel_variance4x4_bits10, - vp9_high_sub_pixel_avg_variance4x4_bits10, + vp9_high_10_variance4x4, + vp9_high_10_sub_pixel_variance4x4, + vp9_high_10_sub_pixel_avg_variance4x4, vp9_high_sad4x4x3_bits10, vp9_high_sad4x4x8_bits10, vp9_high_sad4x4x4d_bits10) @@ -1406,107 +1253,107 @@ static void high_set_var_fns(VP9_COMP *const cpi) { case BITS_12: HIGH_BFP(BLOCK_32X16, vp9_high_sad32x16_bits12, vp9_high_sad32x16_avg_bits12, - vp9_high_variance32x16_bits12, - vp9_high_sub_pixel_variance32x16_bits12, - vp9_high_sub_pixel_avg_variance32x16_bits12, + vp9_high_12_variance32x16, + vp9_high_12_sub_pixel_variance32x16, + vp9_high_12_sub_pixel_avg_variance32x16, NULL, NULL, vp9_high_sad32x16x4d_bits12) HIGH_BFP(BLOCK_16X32, vp9_high_sad16x32_bits12, vp9_high_sad16x32_avg_bits12, - vp9_high_variance16x32_bits12, - vp9_high_sub_pixel_variance16x32_bits12, - vp9_high_sub_pixel_avg_variance16x32_bits12, + vp9_high_12_variance16x32, + vp9_high_12_sub_pixel_variance16x32, + vp9_high_12_sub_pixel_avg_variance16x32, NULL, NULL, vp9_high_sad16x32x4d_bits12) HIGH_BFP(BLOCK_64X32, vp9_high_sad64x32_bits12, vp9_high_sad64x32_avg_bits12, - vp9_high_variance64x32_bits12, - vp9_high_sub_pixel_variance64x32_bits12, - vp9_high_sub_pixel_avg_variance64x32_bits12, + vp9_high_12_variance64x32, + vp9_high_12_sub_pixel_variance64x32, + vp9_high_12_sub_pixel_avg_variance64x32, NULL, NULL, vp9_high_sad64x32x4d_bits12) HIGH_BFP(BLOCK_32X64, vp9_high_sad32x64_bits12, vp9_high_sad32x64_avg_bits12, - vp9_high_variance32x64_bits12, - vp9_high_sub_pixel_variance32x64_bits12, - vp9_high_sub_pixel_avg_variance32x64_bits12, + vp9_high_12_variance32x64, + vp9_high_12_sub_pixel_variance32x64, + vp9_high_12_sub_pixel_avg_variance32x64, NULL, NULL, vp9_high_sad32x64x4d_bits12) HIGH_BFP(BLOCK_32X32, vp9_high_sad32x32_bits12, vp9_high_sad32x32_avg_bits12, - vp9_high_variance32x32_bits12, - vp9_high_sub_pixel_variance32x32_bits12, - vp9_high_sub_pixel_avg_variance32x32_bits12, + vp9_high_12_variance32x32, + vp9_high_12_sub_pixel_variance32x32, + vp9_high_12_sub_pixel_avg_variance32x32, vp9_high_sad32x32x3_bits12, vp9_high_sad32x32x8_bits12, vp9_high_sad32x32x4d_bits12) HIGH_BFP(BLOCK_64X64, vp9_high_sad64x64_bits12, vp9_high_sad64x64_avg_bits12, - vp9_high_variance64x64_bits12, - vp9_high_sub_pixel_variance64x64_bits12, - vp9_high_sub_pixel_avg_variance64x64_bits12, + vp9_high_12_variance64x64, + vp9_high_12_sub_pixel_variance64x64, + vp9_high_12_sub_pixel_avg_variance64x64, vp9_high_sad64x64x3_bits12, vp9_high_sad64x64x8_bits12, vp9_high_sad64x64x4d_bits12) HIGH_BFP(BLOCK_16X16, vp9_high_sad16x16_bits12, vp9_high_sad16x16_avg_bits12, - vp9_high_variance16x16_bits12, - vp9_high_sub_pixel_variance16x16_bits12, - vp9_high_sub_pixel_avg_variance16x16_bits12, + vp9_high_12_variance16x16, + vp9_high_12_sub_pixel_variance16x16, + vp9_high_12_sub_pixel_avg_variance16x16, vp9_high_sad16x16x3_bits12, vp9_high_sad16x16x8_bits12, vp9_high_sad16x16x4d_bits12) HIGH_BFP(BLOCK_16X8, vp9_high_sad16x8_bits12, vp9_high_sad16x8_avg_bits12, - vp9_high_variance16x8_bits12, - vp9_high_sub_pixel_variance16x8_bits12, - vp9_high_sub_pixel_avg_variance16x8_bits12, + vp9_high_12_variance16x8, + vp9_high_12_sub_pixel_variance16x8, + vp9_high_12_sub_pixel_avg_variance16x8, vp9_high_sad16x8x3_bits12, vp9_high_sad16x8x8_bits12, vp9_high_sad16x8x4d_bits12) HIGH_BFP(BLOCK_8X16, vp9_high_sad8x16_bits12, vp9_high_sad8x16_avg_bits12, - vp9_high_variance8x16_bits12, - vp9_high_sub_pixel_variance8x16_bits12, - vp9_high_sub_pixel_avg_variance8x16_bits12, + vp9_high_12_variance8x16, + vp9_high_12_sub_pixel_variance8x16, + vp9_high_12_sub_pixel_avg_variance8x16, vp9_high_sad8x16x3_bits12, vp9_high_sad8x16x8_bits12, vp9_high_sad8x16x4d_bits12) HIGH_BFP(BLOCK_8X8, vp9_high_sad8x8_bits12, vp9_high_sad8x8_avg_bits12, - vp9_high_variance8x8_bits12, - vp9_high_sub_pixel_variance8x8_bits12, - vp9_high_sub_pixel_avg_variance8x8_bits12, + vp9_high_12_variance8x8, + vp9_high_12_sub_pixel_variance8x8, + vp9_high_12_sub_pixel_avg_variance8x8, vp9_high_sad8x8x3_bits12, vp9_high_sad8x8x8_bits12, vp9_high_sad8x8x4d_bits12) HIGH_BFP(BLOCK_8X4, vp9_high_sad8x4_bits12, vp9_high_sad8x4_avg_bits12, - vp9_high_variance8x4_bits12, - vp9_high_sub_pixel_variance8x4_bits12, - vp9_high_sub_pixel_avg_variance8x4_bits12, + vp9_high_12_variance8x4, + vp9_high_12_sub_pixel_variance8x4, + vp9_high_12_sub_pixel_avg_variance8x4, NULL, vp9_high_sad8x4x8_bits12, vp9_high_sad8x4x4d_bits12) HIGH_BFP(BLOCK_4X8, vp9_high_sad4x8_bits12, vp9_high_sad4x8_avg_bits12, - vp9_high_variance4x8_bits12, - vp9_high_sub_pixel_variance4x8_bits12, - vp9_high_sub_pixel_avg_variance4x8_bits12, + vp9_high_12_variance4x8, + vp9_high_12_sub_pixel_variance4x8, + vp9_high_12_sub_pixel_avg_variance4x8, NULL, vp9_high_sad4x8x8_bits12, vp9_high_sad4x8x4d_bits12) HIGH_BFP(BLOCK_4X4, vp9_high_sad4x4_bits12, vp9_high_sad4x4_avg_bits12, - vp9_high_variance4x4_bits12, - vp9_high_sub_pixel_variance4x4_bits12, - vp9_high_sub_pixel_avg_variance4x4_bits12, + vp9_high_12_variance4x4, + vp9_high_12_sub_pixel_variance4x4, + vp9_high_12_sub_pixel_avg_variance4x4, vp9_high_sad4x4x3_bits12, vp9_high_sad4x4x8_bits12, vp9_high_sad4x4x4d_bits12) @@ -2784,10 +2631,19 @@ static void encode_with_recode_loop(VP9_COMP *cpi, rc->this_key_frame_forced && (rc->projected_frame_size < rc->max_frame_bandwidth)) { int last_q = q; - int kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); + int kf_err; int high_err_target = cpi->ambient_err; int low_err_target = cpi->ambient_err >> 1; +#if CONFIG_VP9_HIGH + if (cm->use_high) + kf_err = vp9_high_get_y_sse(cpi->Source, get_frame_new_buffer(cm), + cm->bit_depth); + else + kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); +#else + kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); +#endif // Prevent possible divide by zero error below for perfect KF kf_err += !kf_err; @@ -3141,20 +2997,15 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, // fixed interval. Note the reconstruction error if it is the frame before // the force key frame if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) { - cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); #if CONFIG_VP9_HIGH - if (cm->use_high) { - switch (cm->bit_depth) { - default: - break; - case BITS_10: - cpi->ambient_err >>= 4; - break; - case BITS_12: - cpi->ambient_err >>= 8; - break; - } - } + if (cm->use_high) + cpi->ambient_err = vp9_high_get_y_sse(cpi->Source, + get_frame_new_buffer(cm), + cm->bit_depth); + else + cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); +#else + cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); #endif } @@ -3945,17 +3796,37 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc) { int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b) { assert(a->y_crop_width == b->y_crop_width); assert(a->y_crop_height == b->y_crop_height); -#if CONFIG_VP9_HIGH - if (a->flags & YV12_FLAG_HIGH) { - return (int) high_get_sse(a->y_buffer, a->y_stride, b->y_buffer, - b->y_stride, a->y_crop_width, - a->y_crop_height); - } -#endif - return (int)get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, a->y_crop_width, a->y_crop_height); } +#if CONFIG_VP9_HIGH +int vp9_high_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, + BIT_DEPTH bit_depth) { + unsigned int sse; + int sum; + assert(a->y_crop_width == b->y_crop_width); + assert(a->y_crop_height == b->y_crop_height); + assert((a->flags & YV12_FLAG_HIGH) != 0); + assert((b->flags & YV12_FLAG_HIGH) != 0); + switch (bit_depth) { + default: + case BITS_8: + high_variance(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, + a->y_crop_width, a->y_crop_height, &sse, &sum); + return (int) sse; + case BITS_10: + high_10_variance(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, + a->y_crop_width, a->y_crop_height, &sse, &sum); + return (int) sse; + case BITS_12: + high_12_variance(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, + a->y_crop_width, a->y_crop_height, &sse, &sum); + return (int) sse; + } + assert(0); + +} +#endif int vp9_get_quantizer(VP9_COMP *cpi) { return cpi->common.base_qindex; diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 0da0f5502..79d890b9b 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -610,6 +610,11 @@ static INLINE int get_token_alloc(int mb_rows, int mb_cols) { int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); +#if CONFIG_VP9_HIGH +int vp9_high_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, + BIT_DEPTH bit_depth); +#endif + void vp9_alloc_compressor_data(VP9_COMP *cpi); void vp9_scale_references(VP9_COMP *cpi); diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index fad00f77f..02cd364bb 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -405,31 +405,6 @@ static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) { #if CONFIG_VP9_HIGH -#define MAKE_MSE_WRAPPER(fnname) \ -static unsigned int fnname##_bits10(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sse) { \ - unsigned int val = fnname(src_ptr, source_stride, ref_ptr, ref_stride, sse); \ - *sse >>= 4; \ - return val >> 4; \ -} \ -static unsigned int fnname##_bits12(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, \ - int ref_stride, \ - unsigned int *sse) { \ - unsigned int val = fnname(src_ptr, source_stride, ref_ptr, ref_stride, sse); \ - *sse >>= 8; \ - return val >> 8; \ -} - -MAKE_MSE_WRAPPER(vp9_high_mse8x8) -MAKE_MSE_WRAPPER(vp9_high_mse16x8) -MAKE_MSE_WRAPPER(vp9_high_mse8x16) -MAKE_MSE_WRAPPER(vp9_high_mse16x16) - static vp9_variance_fn_t high_get_block_variance_fn(BLOCK_SIZE bsize, int bps) { switch (bps) { default: @@ -447,25 +422,25 @@ static vp9_variance_fn_t high_get_block_variance_fn(BLOCK_SIZE bsize, int bps) { case 10: switch (bsize) { case BLOCK_8X8: - return vp9_high_mse8x8_bits10; + return vp9_high_10_mse8x8; case BLOCK_16X8: - return vp9_high_mse16x8_bits10; + return vp9_high_10_mse16x8; case BLOCK_8X16: - return vp9_high_mse8x16_bits10; + return vp9_high_10_mse8x16; default: - return vp9_high_mse16x16_bits10; + return vp9_high_10_mse16x16; } break; case 12: switch (bsize) { case BLOCK_8X8: - return vp9_high_mse8x8_bits12; + return vp9_high_12_mse8x8; case BLOCK_16X8: - return vp9_high_mse16x8_bits12; + return vp9_high_12_mse16x8; case BLOCK_8X16: - return vp9_high_mse8x16_bits12; + return vp9_high_12_mse8x16; default: - return vp9_high_mse16x16_bits12; + return vp9_high_12_mse16x16; } break; } diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c index 9dd5744b5..23b12ba8a 100644 --- a/vp9/encoder/vp9_mcomp.c +++ b/vp9/encoder/vp9_mcomp.c @@ -403,7 +403,8 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x, #if CONFIG_VP9_HIGH if (xd->cur_buf->flags & YV12_FLAG_HIGH) { - high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, y_stride); + vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, + y_stride); besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride, sse1); } else { vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index 53284656e..521d0cf93 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -40,8 +40,15 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->mb.e_mbd, filt_level, 1, partial_frame); +#if CONFIG_VP9_HIGH + if (cm->use_high) { + filt_err = vp9_high_get_y_sse(sd, cm->frame_to_show, cm->bit_depth); + } else { + filt_err = vp9_get_y_sse(sd, cm->frame_to_show); + } +#else filt_err = vp9_get_y_sse(sd, cm->frame_to_show); - +#endif // Re-instate the unfiltered frame vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); diff --git a/vp9/encoder/vp9_sad.c b/vp9/encoder/vp9_sad.c index 184a26a13..627ffa08b 100644 --- a/vp9/encoder/vp9_sad.c +++ b/vp9/encoder/vp9_sad.c @@ -184,7 +184,7 @@ unsigned int vp9_high_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \ const uint8_t *second_pred, \ unsigned int max_sad) { \ uint16_t comp_pred[m * n]; \ - high_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \ + vp9_high_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \ return high_sadb(src, src_stride, comp_pred, m, m, n); \ } diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c index 91f01c017..917cc2339 100644 --- a/vp9/encoder/vp9_variance.c +++ b/vp9/encoder/vp9_variance.c @@ -259,7 +259,8 @@ void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, void high_variance(const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, - int w, int h, unsigned int *sse, int *sum) { + int w, int h, unsigned int *sse, + int *sum) { int i, j; uint16_t *a = CONVERT_TO_SHORTPTR(a8); @@ -273,12 +274,59 @@ void high_variance(const uint8_t *a8, int a_stride, *sum += diff; *sse += diff * diff; } - a += a_stride; b += b_stride; } } +void high_10_variance(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + int w, int h, unsigned int *sse, + int *sum) { + int i, j; + uint64_t sse_long = 0; + uint64_t sum_long = 0; + + uint16_t *a = CONVERT_TO_SHORTPTR(a8); + uint16_t *b = CONVERT_TO_SHORTPTR(b8); + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + const int diff = a[j] - b[j]; + sum_long += diff; + sse_long += diff * diff; + } + a += a_stride; + b += b_stride; + } + *sum = ROUND_POWER_OF_TWO(sum_long, 2); + *sse = ROUND_POWER_OF_TWO(sse_long, 4); +} + +void high_12_variance(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + int w, int h, unsigned int *sse, + int *sum) { + int i, j; + uint64_t sse_long = 0; + uint64_t sum_long = 0; + + uint16_t *a = CONVERT_TO_SHORTPTR(a8); + uint16_t *b = CONVERT_TO_SHORTPTR(b8); + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + const int diff = a[j] - b[j]; + sum_long += diff; + sse_long += diff * diff; + } + a += a_stride; + b += b_stride; + } + *sum = ROUND_POWER_OF_TWO(sum_long, 4); + *sse = ROUND_POWER_OF_TWO(sse_long, 8); +} + static void high_var_filter_block2d_bil_first_pass( const uint8_t *src_ptr8, uint16_t *output_ptr, @@ -336,18 +384,27 @@ unsigned int vp9_high_get_mb_ss_c(const int16_t *src_ptr) { return sum; } -unsigned int vp9_high_variance64x32_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, - 64, 32, &var, &avg); - *sse = var; - return (var - (((int64_t)avg * avg) >> 11)); +#define HIGH_VAR(W, H) \ +unsigned int vp9_high_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ + const uint8_t *b, int b_stride, \ + unsigned int *sse) { \ + int sum; \ + high_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ + return *sse - (((int64_t)sum * sum) / (W * H)); \ +} \ +unsigned int vp9_high_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ + const uint8_t *b, int b_stride, \ + unsigned int *sse) { \ + int sum; \ + high_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ + return *sse - (((int64_t)sum * sum) / (W * H)); \ +} \ +unsigned int vp9_high_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ + const uint8_t *b, int b_stride, \ + unsigned int *sse) { \ + int sum; \ + high_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ + return *sse - (((int64_t)sum * sum) / (W * H)); \ } #define HIGH_SUBPIX_VAR(W, H) \ @@ -366,6 +423,38 @@ unsigned int vp9_high_sub_pixel_variance##W##x##H##_c( \ \ return vp9_high_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ dst_stride, sse); \ +} \ +unsigned int vp9_high_10_sub_pixel_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, \ + int xoffset, int yoffset, \ + const uint8_t *dst, int dst_stride, \ + unsigned int *sse) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ +\ + high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ + W, BILINEAR_FILTERS_2TAP(xoffset)); \ + high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + BILINEAR_FILTERS_2TAP(yoffset)); \ +\ + return vp9_high_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ + dst_stride, sse); \ +} \ +unsigned int vp9_high_12_sub_pixel_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, \ + int xoffset, int yoffset, \ + const uint8_t *dst, int dst_stride, \ + unsigned int *sse) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ +\ + high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ + W, BILINEAR_FILTERS_2TAP(xoffset)); \ + high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + BILINEAR_FILTERS_2TAP(yoffset)); \ +\ + return vp9_high_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ + dst_stride, sse); \ } @@ -385,394 +474,150 @@ unsigned int vp9_high_sub_pixel_avg_variance##W##x##H##_c( \ high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ BILINEAR_FILTERS_2TAP(yoffset)); \ \ - high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), W); \ + vp9_high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), \ + W); \ \ return vp9_high_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \ dst_stride, sse); \ +} \ +unsigned int vp9_high_10_sub_pixel_avg_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, \ + int xoffset, int yoffset, \ + const uint8_t *dst, int dst_stride, \ + unsigned int *sse, \ + const uint8_t *second_pred) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \ +\ + high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ + W, BILINEAR_FILTERS_2TAP(xoffset)); \ + high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + BILINEAR_FILTERS_2TAP(yoffset)); \ +\ + vp9_high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), \ + W); \ +\ + return vp9_high_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \ + dst_stride, sse); \ +} \ +unsigned int vp9_high_12_sub_pixel_avg_variance##W##x##H##_c( \ + const uint8_t *src, int src_stride, \ + int xoffset, int yoffset, \ + const uint8_t *dst, int dst_stride, \ + unsigned int *sse, \ + const uint8_t *second_pred) { \ + uint16_t fdata3[(H + 1) * W]; \ + uint16_t temp2[H * W]; \ + DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \ +\ + high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ + W, BILINEAR_FILTERS_2TAP(xoffset)); \ + high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ + BILINEAR_FILTERS_2TAP(yoffset)); \ +\ + vp9_high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), \ + W); \ +\ + return vp9_high_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \ + dst_stride, sse); \ } -unsigned int vp9_high_variance32x64_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, - 32, 64, &var, &avg); - *sse = var; - return (var - (((int64_t)avg * avg) >> 11)); +#define HIGH_MSE(W, H) \ +unsigned int vp9_high_mse##W##x##H##_c(const uint8_t *src, int src_stride, \ + const uint8_t *ref, int ref_stride, \ + unsigned int *sse) { \ + int sum; \ + high_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ + return *sse; \ +} \ +unsigned int vp9_high_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \ + const uint8_t *ref, int ref_stride, \ + unsigned int *sse) { \ + int sum; \ + high_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ + return *sse; \ +} \ +unsigned int vp9_high_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \ + const uint8_t *ref, int ref_stride, \ + unsigned int *sse) { \ + int sum; \ + high_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ + return *sse; \ } -unsigned int vp9_high_variance32x16_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, - &var, &avg); - *sse = var; - return (var - (((int64_t)avg * avg) >> 9)); -} - -unsigned int vp9_high_variance16x32_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, - 16, 32, &var, &avg); - *sse = var; - return (var - (((int64_t)avg * avg) >> 9)); -} - -unsigned int vp9_high_variance64x64_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, - &var, &avg); - *sse = var; - return (var - (((int64_t)avg * avg) >> 12)); -} - -unsigned int vp9_high_variance32x32_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, - &var, &avg); - *sse = var; - return (var - (((int64_t)avg * avg) >> 10)); -} - -unsigned int vp9_high_variance16x16_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, - &var, &avg); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 8)); -} - -unsigned int vp9_high_variance8x16_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, - &var, &avg); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); -} - -unsigned int vp9_high_variance16x8_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, - &var, &avg); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 7)); -} - -unsigned int vp9_high_variance8x8_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, - &var, &avg); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 6)); -} - -unsigned int vp9_high_variance8x4_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, - &var, &avg); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 5)); -} - -unsigned int vp9_high_variance4x8_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, - &var, &avg); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 5)); -} - -unsigned int vp9_high_variance4x4_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, - &var, &avg); - *sse = var; - return (var - (((unsigned int)avg * avg) >> 4)); -} - - -unsigned int vp9_high_mse16x16_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, - &var, &avg); - *sse = var; - return var; -} - -unsigned int vp9_high_mse16x8_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, - &var, &avg); - *sse = var; - return var; -} - -unsigned int vp9_high_mse8x16_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, - &var, &avg); - *sse = var; - return var; -} - -unsigned int vp9_high_mse8x8_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - unsigned int var; - int avg; - - high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, - &var, &avg); - *sse = var; - return var; -} +HIGH_MSE(16, 16) +HIGH_MSE(16, 8) +HIGH_MSE(8, 16) +HIGH_MSE(8, 8) +HIGH_VAR(4, 4) HIGH_SUBPIX_VAR(4, 4) HIGH_SUBPIX_AVG_VAR(4, 4) +HIGH_VAR(4, 8) HIGH_SUBPIX_VAR(4, 8) HIGH_SUBPIX_AVG_VAR(4, 8) +HIGH_VAR(8, 4) HIGH_SUBPIX_VAR(8, 4) HIGH_SUBPIX_AVG_VAR(8, 4) +HIGH_VAR(8, 8) HIGH_SUBPIX_VAR(8, 8) HIGH_SUBPIX_AVG_VAR(8, 8) +HIGH_VAR(8, 16) HIGH_SUBPIX_VAR(8, 16) HIGH_SUBPIX_AVG_VAR(8, 16) +HIGH_VAR(16, 8) HIGH_SUBPIX_VAR(16, 8) HIGH_SUBPIX_AVG_VAR(16, 8) +HIGH_VAR(16, 16) HIGH_SUBPIX_VAR(16, 16) HIGH_SUBPIX_AVG_VAR(16, 16) +HIGH_VAR(16, 32) HIGH_SUBPIX_VAR(16, 32) HIGH_SUBPIX_AVG_VAR(16, 32) +HIGH_VAR(32, 16) HIGH_SUBPIX_VAR(32, 16) HIGH_SUBPIX_AVG_VAR(32, 16) +HIGH_VAR(32, 32) HIGH_SUBPIX_VAR(32, 32) HIGH_SUBPIX_AVG_VAR(32, 32) +HIGH_VAR(32, 64) HIGH_SUBPIX_VAR(32, 64) HIGH_SUBPIX_AVG_VAR(32, 64) +HIGH_VAR(64, 32) HIGH_SUBPIX_VAR(64, 32) HIGH_SUBPIX_AVG_VAR(64, 32) +HIGH_VAR(64, 64) HIGH_SUBPIX_VAR(64, 64) HIGH_SUBPIX_AVG_VAR(64, 64) -unsigned int vp9_high_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_high_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_high_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_high_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_high_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_high_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_high_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_high_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_high_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_high_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_high_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_high_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_high_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_high_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_high_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_high_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_high_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, - int source_stride, - const uint8_t *ref_ptr, - int recon_stride, - unsigned int *sse) { - return vp9_high_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8, - ref_ptr, recon_stride, sse); -} - -unsigned int vp9_high_sub_pixel_mse16x16_c(const uint8_t *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const uint8_t *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - vp9_high_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, - xoffset, yoffset, dst_ptr, - dst_pixels_per_line, sse); - return *sse; -} - -unsigned int vp9_high_sub_pixel_mse32x32_c(const uint8_t *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const uint8_t *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - vp9_high_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line, - xoffset, yoffset, dst_ptr, - dst_pixels_per_line, sse); - return *sse; -} - -unsigned int vp9_high_sub_pixel_mse64x64_c(const uint8_t *src_ptr, - int src_pixels_per_line, - int xoffset, - int yoffset, - const uint8_t *dst_ptr, - int dst_pixels_per_line, - unsigned int *sse) { - vp9_high_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line, - xoffset, yoffset, dst_ptr, - dst_pixels_per_line, sse); - return *sse; +void vp9_high_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8, + int width, int height, const uint8_t *ref8, + int ref_stride) { + int i, j; + uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + const int tmp = pred[j] + ref[j]; + comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); + } + comp_pred += width; + pred += width; + ref += ref_stride; + } } #endif diff --git a/vp9/encoder/vp9_variance.h b/vp9/encoder/vp9_variance.h index e1ef9fcf7..df89e58f7 100644 --- a/vp9/encoder/vp9_variance.h +++ b/vp9/encoder/vp9_variance.h @@ -25,8 +25,18 @@ void variance(const uint8_t *a, int a_stride, #if CONFIG_VP9_HIGH void high_variance(const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, - int w, int h, + int w, int h, unsigned int *sse, int *sum); + +void high_10_variance(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + int w, int h, + unsigned int *sse, int *sum); + +void high_12_variance(const uint8_t *a8, int a_stride, + const uint8_t *b8, int b_stride, + int w, int h, + unsigned int *sse, int *sum); #endif @@ -91,23 +101,9 @@ typedef struct vp9_variance_vtable { void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride); -#if defined(CONVERT_TO_SHORTPTR) && CONFIG_VP9_HIGH -static void high_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8, - int width, int height, const uint8_t *ref8, - int ref_stride) { - int i, j; - uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); - uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - const int tmp = pred[j] + ref[j]; - comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); - } - comp_pred += width; - pred += width; - ref += ref_stride; - } -} +#if CONFIG_VP9_HIGH +void vp9_high_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred, int width, + int height, const uint8_t *ref, int ref_stride); #endif #ifdef __cplusplus