Tidied high bitdepth variance

Refactored code to match current style on master.
Also fixed a bug where some sse results were
not being shifted for high bitdepth.
Also increased internal bitdepth for variance to
avoid saturation.
Also added rounding for variance when computing
shifted results.

Change-Id: I322bbc1b9abe82c8ef72ab97991720240ddf755c
This commit is contained in:
Peter de Rivaz
2014-06-03 12:47:17 +01:00
parent eb863b46f3
commit 2baec56312
9 changed files with 653 additions and 732 deletions

View File

@@ -1118,6 +1118,80 @@ specialize qw/vp9_high_variance4x8/;
add_proto qw/unsigned int vp9_high_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_variance4x4/;
add_proto qw/unsigned int vp9_high_10_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_variance32x16/;
add_proto qw/unsigned int vp9_high_10_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_variance16x32/;
add_proto qw/unsigned int vp9_high_10_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_variance64x32/;
add_proto qw/unsigned int vp9_high_10_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_variance32x64/;
add_proto qw/unsigned int vp9_high_10_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_variance32x32/;
add_proto qw/unsigned int vp9_high_10_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_variance64x64/;
add_proto qw/unsigned int vp9_high_10_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_variance16x16/;
add_proto qw/unsigned int vp9_high_10_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_variance16x8/;
add_proto qw/unsigned int vp9_high_10_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_variance8x16/;
add_proto qw/unsigned int vp9_high_10_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_variance8x8/;
add_proto qw/unsigned int vp9_high_10_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_variance8x4/;
add_proto qw/unsigned int vp9_high_10_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_variance4x8/;
add_proto qw/unsigned int vp9_high_10_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_variance4x4/;
add_proto qw/unsigned int vp9_high_12_variance32x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_variance32x16/;
add_proto qw/unsigned int vp9_high_12_variance16x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_variance16x32/;
add_proto qw/unsigned int vp9_high_12_variance64x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_variance64x32/;
add_proto qw/unsigned int vp9_high_12_variance32x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_variance32x64/;
add_proto qw/unsigned int vp9_high_12_variance32x32/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_variance32x32/;
add_proto qw/unsigned int vp9_high_12_variance64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_variance64x64/;
add_proto qw/unsigned int vp9_high_12_variance16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_variance16x16/;
add_proto qw/unsigned int vp9_high_12_variance16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_variance16x8/;
add_proto qw/unsigned int vp9_high_12_variance8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_variance8x16/;
add_proto qw/unsigned int vp9_high_12_variance8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_variance8x8/;
add_proto qw/unsigned int vp9_high_12_variance8x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_variance8x4/;
add_proto qw/unsigned int vp9_high_12_variance4x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_variance4x8/;
add_proto qw/unsigned int vp9_high_12_variance4x4/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_variance4x4/;
add_proto qw/unsigned int vp9_high_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_sub_pixel_variance64x64/;
@@ -1197,6 +1271,162 @@ specialize qw/vp9_high_sub_pixel_variance4x4/;
add_proto qw/unsigned int vp9_high_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_sub_pixel_avg_variance4x4/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_sub_pixel_variance64x64/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_10_sub_pixel_avg_variance64x64/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_sub_pixel_variance32x64/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_10_sub_pixel_avg_variance32x64/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_sub_pixel_variance64x32/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_10_sub_pixel_avg_variance64x32/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_sub_pixel_variance32x16/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_10_sub_pixel_avg_variance32x16/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_sub_pixel_variance16x32/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_10_sub_pixel_avg_variance16x32/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_sub_pixel_variance32x32/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_10_sub_pixel_avg_variance32x32/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_sub_pixel_variance16x16/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_10_sub_pixel_avg_variance16x16/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_sub_pixel_variance8x16/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_10_sub_pixel_avg_variance8x16/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_sub_pixel_variance16x8/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_10_sub_pixel_avg_variance16x8/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_sub_pixel_variance8x8/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_10_sub_pixel_avg_variance8x8/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_sub_pixel_variance8x4/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_10_sub_pixel_avg_variance8x4/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_sub_pixel_variance4x8/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_10_sub_pixel_avg_variance4x8/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_10_sub_pixel_variance4x4/;
add_proto qw/unsigned int vp9_high_10_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_10_sub_pixel_avg_variance4x4/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_sub_pixel_variance64x64/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_12_sub_pixel_avg_variance64x64/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_sub_pixel_variance32x64/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance32x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_12_sub_pixel_avg_variance32x64/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_sub_pixel_variance64x32/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance64x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_12_sub_pixel_avg_variance64x32/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_sub_pixel_variance32x16/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance32x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_12_sub_pixel_avg_variance32x16/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_sub_pixel_variance16x32/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance16x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_12_sub_pixel_avg_variance16x32/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_sub_pixel_variance32x32/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_12_sub_pixel_avg_variance32x32/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_sub_pixel_variance16x16/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance16x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_12_sub_pixel_avg_variance16x16/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_sub_pixel_variance8x16/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance8x16/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_12_sub_pixel_avg_variance8x16/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_sub_pixel_variance16x8/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance16x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_12_sub_pixel_avg_variance16x8/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_sub_pixel_variance8x8/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance8x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_12_sub_pixel_avg_variance8x8/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_sub_pixel_variance8x4/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance8x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_12_sub_pixel_avg_variance8x4/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_sub_pixel_variance4x8/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance4x8/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_12_sub_pixel_avg_variance4x8/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_12_sub_pixel_variance4x4/;
add_proto qw/unsigned int vp9_high_12_sub_pixel_avg_variance4x4/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred";
specialize qw/vp9_high_12_sub_pixel_avg_variance4x4/;
add_proto qw/unsigned int vp9_high_sad64x64/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int max_sad";
specialize qw/vp9_high_sad64x64/;
@@ -1275,33 +1505,6 @@ specialize qw/vp9_high_sad4x8_avg/;
add_proto qw/unsigned int vp9_high_sad4x4_avg/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, unsigned int max_sad";
specialize qw/vp9_high_sad4x4_avg/;
add_proto qw/unsigned int vp9_high_variance_halfpixvar16x16_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_variance_halfpixvar16x16_h/;
add_proto qw/unsigned int vp9_high_variance_halfpixvar16x16_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_variance_halfpixvar16x16_v/;
add_proto qw/unsigned int vp9_high_variance_halfpixvar16x16_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_variance_halfpixvar16x16_hv/;
add_proto qw/unsigned int vp9_high_variance_halfpixvar64x64_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_variance_halfpixvar64x64_h/;
add_proto qw/unsigned int vp9_high_variance_halfpixvar64x64_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_variance_halfpixvar64x64_v/;
add_proto qw/unsigned int vp9_high_variance_halfpixvar64x64_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_variance_halfpixvar64x64_hv/;
add_proto qw/unsigned int vp9_high_variance_halfpixvar32x32_h/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_variance_halfpixvar32x32_h/;
add_proto qw/unsigned int vp9_high_variance_halfpixvar32x32_v/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_variance_halfpixvar32x32_v/;
add_proto qw/unsigned int vp9_high_variance_halfpixvar32x32_hv/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_variance_halfpixvar32x32_hv/;
add_proto qw/void vp9_high_sad64x64x3/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array";
specialize qw/vp9_high_sad64x64x3/;
@@ -1402,11 +1605,29 @@ specialize qw/vp9_high_mse16x8/;
add_proto qw/unsigned int vp9_high_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_high_mse8x8/;
add_proto qw/unsigned int vp9_high_sub_pixel_mse64x64/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_sub_pixel_mse64x64/;
add_proto qw/unsigned int vp9_high_10_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_high_10_mse16x16/;
add_proto qw/unsigned int vp9_high_sub_pixel_mse32x32/, "const uint8_t *src_ptr, int source_stride, int xoffset, int yoffset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
specialize qw/vp9_high_sub_pixel_mse32x32/;
add_proto qw/unsigned int vp9_high_10_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_high_10_mse8x16/;
add_proto qw/unsigned int vp9_high_10_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_high_10_mse16x8/;
add_proto qw/unsigned int vp9_high_10_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_high_10_mse8x8/;
add_proto qw/unsigned int vp9_high_12_mse16x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_high_12_mse16x16/;
add_proto qw/unsigned int vp9_high_12_mse8x16/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_high_12_mse8x16/;
add_proto qw/unsigned int vp9_high_12_mse16x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_high_12_mse16x8/;
add_proto qw/unsigned int vp9_high_12_mse8x8/, "const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, int recon_stride, unsigned int *sse";
specialize qw/vp9_high_12_mse8x8/;
add_proto qw/unsigned int vp9_high_get_mb_ss/, "const int16_t *";
specialize qw/vp9_high_get_mb_ss/;

View File

@@ -972,159 +972,6 @@ static void fnname##_bits12(const uint8_t *src_ptr, \
sad_array[i] >>= 4; \
}
#define MAKE_BFP_WRAPPER(fnname) \
static unsigned int fnname##_bits8(const uint8_t *src_ptr, \
int source_stride, \
const uint8_t *ref_ptr, \
int ref_stride, \
unsigned int *sse) { \
unsigned int val = fnname(src_ptr, source_stride, ref_ptr, ref_stride, sse); \
return val; \
} \
static unsigned int fnname##_bits10(const uint8_t *src_ptr, \
int source_stride, \
const uint8_t *ref_ptr, \
int ref_stride, \
unsigned int *sse) { \
unsigned int val = fnname(src_ptr, source_stride, ref_ptr, ref_stride, sse); \
*sse >>= 4; \
return val >> 4; \
} \
static unsigned int fnname##_bits12(const uint8_t *src_ptr, \
int source_stride, \
const uint8_t *ref_ptr, \
int ref_stride, \
unsigned int *sse) { \
unsigned int val = fnname(src_ptr, source_stride, ref_ptr, ref_stride, sse); \
*sse >>= 8; \
return val >> 8; \
}
#define MAKE_BFP_SUB_WRAPPER(fnname) \
static unsigned int fnname##_bits8(const uint8_t *src_ptr, \
int source_stride, \
int xoffset, \
int yoffset, \
const uint8_t *ref_ptr, \
int ref_stride, \
unsigned int *sse) { \
unsigned int val = fnname(src_ptr, source_stride, xoffset, yoffset, ref_ptr, \
ref_stride, sse); \
return val; \
} \
static unsigned int fnname##_bits10(const uint8_t *src_ptr, \
int source_stride, \
int xoffset, \
int yoffset, \
const uint8_t *ref_ptr, \
int ref_stride, \
unsigned int *sse) { \
unsigned int val = fnname(src_ptr, source_stride, xoffset, yoffset, ref_ptr, \
ref_stride, sse); \
*sse >>= 4; \
return val >> 4; \
} \
static unsigned int fnname##_bits12(const uint8_t *src_ptr, \
int source_stride, \
int xoffset, \
int yoffset, \
const uint8_t *ref_ptr, \
int ref_stride, \
unsigned int *sse) { \
unsigned int val = fnname(src_ptr, source_stride, xoffset, yoffset, ref_ptr, \
ref_stride, sse); \
*sse >>= 8; \
return val >> 8; \
}
#define MAKE_BFP_SUB_AVG_WRAPPER(fnname) \
static unsigned int fnname##_bits8(const uint8_t *src_ptr, \
int source_stride, \
int xoffset, \
int yoffset, \
const uint8_t *ref_ptr, \
int ref_stride, \
unsigned int *sse, \
const uint8_t *second_pred) { \
unsigned int val = fnname(src_ptr, source_stride, xoffset, yoffset, ref_ptr, \
ref_stride, sse, second_pred); \
return val; \
} \
static unsigned int fnname##_bits10(const uint8_t *src_ptr, \
int source_stride, \
int xoffset, \
int yoffset, \
const uint8_t *ref_ptr, \
int ref_stride, \
unsigned int *sse, \
const uint8_t *second_pred) { \
unsigned int val = fnname(src_ptr, source_stride, xoffset, yoffset, \
ref_ptr, ref_stride, sse, second_pred); \
*sse >>= 4; \
return val >> 4; \
} \
static unsigned int fnname##_bits12(const uint8_t *src_ptr, \
int source_stride, \
int xoffset, \
int yoffset, \
const uint8_t *ref_ptr, \
int ref_stride, \
unsigned int *sse, \
const uint8_t *second_pred) { \
unsigned int val = fnname(src_ptr, source_stride, xoffset, yoffset, \
ref_ptr, ref_stride, sse, second_pred); \
*sse >>= 8; \
return val >> 8; \
}
MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance32x16)
MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance32x16)
MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance16x32)
MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance16x32)
MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance64x32)
MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance64x32)
MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance32x64)
MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance32x64)
MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance32x32)
MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance32x32)
MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar32x32_h)
MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar32x32_v)
MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar32x32_hv)
MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance64x64)
MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance64x64)
MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar64x64_h)
MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar64x64_v)
MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar64x64_hv)
MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance16x16)
MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance16x16)
MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar16x16_h)
MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar16x16_v)
MAKE_BFP_WRAPPER(vp9_high_variance_halfpixvar16x16_hv)
MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance16x8)
MAKE_BFP_WRAPPER(vp9_high_variance16x8)
MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance16x8)
MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance8x16)
MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance8x16)
MAKE_BFP_WRAPPER(vp9_high_variance8x16)
MAKE_BFP_WRAPPER(vp9_high_variance8x8)
MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance8x8)
MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance8x8)
MAKE_BFP_WRAPPER(vp9_high_variance8x4)
MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance8x4)
MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance8x4)
MAKE_BFP_WRAPPER(vp9_high_variance4x8)
MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance4x8)
MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance4x8)
MAKE_BFP_WRAPPER(vp9_high_variance4x4)
MAKE_BFP_SUB_WRAPPER(vp9_high_sub_pixel_variance4x4)
MAKE_BFP_SUB_AVG_WRAPPER(vp9_high_sub_pixel_avg_variance4x4)
MAKE_BFP_WRAPPER(vp9_high_variance32x16)
MAKE_BFP_WRAPPER(vp9_high_variance16x32)
MAKE_BFP_WRAPPER(vp9_high_variance64x32)
MAKE_BFP_WRAPPER(vp9_high_variance32x64)
MAKE_BFP_WRAPPER(vp9_high_variance32x32)
MAKE_BFP_WRAPPER(vp9_high_variance64x64)
MAKE_BFP_WRAPPER(vp9_high_variance16x16)
MAKE_BFP_SAD_WRAPPER(vp9_high_sad32x16)
MAKE_BFP_SADAVG_WRAPPER(vp9_high_sad32x16_avg)
MAKE_BFP_SAD4D_WRAPPER(vp9_high_sad32x16x4d)
@@ -1188,107 +1035,107 @@ static void high_set_var_fns(VP9_COMP *const cpi) {
default:
HIGH_BFP(BLOCK_32X16, vp9_high_sad32x16_bits8,
vp9_high_sad32x16_avg_bits8,
vp9_high_variance32x16_bits8,
vp9_high_sub_pixel_variance32x16_bits8,
vp9_high_sub_pixel_avg_variance32x16_bits8,
vp9_high_variance32x16,
vp9_high_sub_pixel_variance32x16,
vp9_high_sub_pixel_avg_variance32x16,
NULL, NULL, vp9_high_sad32x16x4d_bits8)
HIGH_BFP(BLOCK_16X32, vp9_high_sad16x32_bits8,
vp9_high_sad16x32_avg_bits8,
vp9_high_variance16x32_bits8,
vp9_high_sub_pixel_variance16x32_bits8,
vp9_high_sub_pixel_avg_variance16x32_bits8,
vp9_high_variance16x32,
vp9_high_sub_pixel_variance16x32,
vp9_high_sub_pixel_avg_variance16x32,
NULL, NULL, vp9_high_sad16x32x4d_bits8)
HIGH_BFP(BLOCK_64X32, vp9_high_sad64x32_bits8,
vp9_high_sad64x32_avg_bits8,
vp9_high_variance64x32_bits8,
vp9_high_sub_pixel_variance64x32_bits8,
vp9_high_sub_pixel_avg_variance64x32_bits8,
vp9_high_variance64x32,
vp9_high_sub_pixel_variance64x32,
vp9_high_sub_pixel_avg_variance64x32,
NULL, NULL, vp9_high_sad64x32x4d_bits8)
HIGH_BFP(BLOCK_32X64, vp9_high_sad32x64_bits8,
vp9_high_sad32x64_avg_bits8,
vp9_high_variance32x64_bits8,
vp9_high_sub_pixel_variance32x64_bits8,
vp9_high_sub_pixel_avg_variance32x64_bits8,
vp9_high_variance32x64,
vp9_high_sub_pixel_variance32x64,
vp9_high_sub_pixel_avg_variance32x64,
NULL, NULL, vp9_high_sad32x64x4d_bits8)
HIGH_BFP(BLOCK_32X32, vp9_high_sad32x32_bits8,
vp9_high_sad32x32_avg_bits8,
vp9_high_variance32x32_bits8,
vp9_high_sub_pixel_variance32x32_bits8,
vp9_high_sub_pixel_avg_variance32x32_bits8,
vp9_high_variance32x32,
vp9_high_sub_pixel_variance32x32,
vp9_high_sub_pixel_avg_variance32x32,
vp9_high_sad32x32x3_bits8,
vp9_high_sad32x32x8_bits8,
vp9_high_sad32x32x4d_bits8)
HIGH_BFP(BLOCK_64X64, vp9_high_sad64x64_bits8,
vp9_high_sad64x64_avg_bits8,
vp9_high_variance64x64_bits8,
vp9_high_sub_pixel_variance64x64_bits8,
vp9_high_sub_pixel_avg_variance64x64_bits8,
vp9_high_variance64x64,
vp9_high_sub_pixel_variance64x64,
vp9_high_sub_pixel_avg_variance64x64,
vp9_high_sad64x64x3_bits8,
vp9_high_sad64x64x8_bits8,
vp9_high_sad64x64x4d_bits8)
HIGH_BFP(BLOCK_16X16, vp9_high_sad16x16_bits8,
vp9_high_sad16x16_avg_bits8,
vp9_high_variance16x16_bits8,
vp9_high_sub_pixel_variance16x16_bits8,
vp9_high_sub_pixel_avg_variance16x16_bits8,
vp9_high_variance16x16,
vp9_high_sub_pixel_variance16x16,
vp9_high_sub_pixel_avg_variance16x16,
vp9_high_sad16x16x3_bits8,
vp9_high_sad16x16x8_bits8,
vp9_high_sad16x16x4d_bits8)
HIGH_BFP(BLOCK_16X8, vp9_high_sad16x8_bits8,
vp9_high_sad16x8_avg_bits8,
vp9_high_variance16x8_bits8,
vp9_high_sub_pixel_variance16x8_bits8,
vp9_high_sub_pixel_avg_variance16x8_bits8,
vp9_high_variance16x8,
vp9_high_sub_pixel_variance16x8,
vp9_high_sub_pixel_avg_variance16x8,
vp9_high_sad16x8x3_bits8,
vp9_high_sad16x8x8_bits8,
vp9_high_sad16x8x4d_bits8)
HIGH_BFP(BLOCK_8X16, vp9_high_sad8x16_bits8,
vp9_high_sad8x16_avg_bits8,
vp9_high_variance8x16_bits8,
vp9_high_sub_pixel_variance8x16_bits8,
vp9_high_sub_pixel_avg_variance8x16_bits8,
vp9_high_variance8x16,
vp9_high_sub_pixel_variance8x16,
vp9_high_sub_pixel_avg_variance8x16,
vp9_high_sad8x16x3_bits8,
vp9_high_sad8x16x8_bits8,
vp9_high_sad8x16x4d_bits8)
HIGH_BFP(BLOCK_8X8, vp9_high_sad8x8_bits8,
vp9_high_sad8x8_avg_bits8,
vp9_high_variance8x8_bits8,
vp9_high_sub_pixel_variance8x8_bits8,
vp9_high_sub_pixel_avg_variance8x8_bits8,
vp9_high_variance8x8,
vp9_high_sub_pixel_variance8x8,
vp9_high_sub_pixel_avg_variance8x8,
vp9_high_sad8x8x3_bits8,
vp9_high_sad8x8x8_bits8,
vp9_high_sad8x8x4d_bits8)
HIGH_BFP(BLOCK_8X4, vp9_high_sad8x4_bits8,
vp9_high_sad8x4_avg_bits8,
vp9_high_variance8x4_bits8,
vp9_high_sub_pixel_variance8x4_bits8,
vp9_high_sub_pixel_avg_variance8x4_bits8,
vp9_high_variance8x4,
vp9_high_sub_pixel_variance8x4,
vp9_high_sub_pixel_avg_variance8x4,
NULL, vp9_high_sad8x4x8_bits8,
vp9_high_sad8x4x4d_bits8)
HIGH_BFP(BLOCK_4X8, vp9_high_sad4x8_bits8,
vp9_high_sad4x8_avg_bits8,
vp9_high_variance4x8_bits8,
vp9_high_sub_pixel_variance4x8_bits8,
vp9_high_sub_pixel_avg_variance4x8_bits8,
vp9_high_variance4x8,
vp9_high_sub_pixel_variance4x8,
vp9_high_sub_pixel_avg_variance4x8,
NULL, vp9_high_sad4x8x8_bits8,
vp9_high_sad4x8x4d_bits8)
HIGH_BFP(BLOCK_4X4, vp9_high_sad4x4_bits8,
vp9_high_sad4x4_avg_bits8,
vp9_high_variance4x4_bits8,
vp9_high_sub_pixel_variance4x4_bits8,
vp9_high_sub_pixel_avg_variance4x4_bits8,
vp9_high_variance4x4,
vp9_high_sub_pixel_variance4x4,
vp9_high_sub_pixel_avg_variance4x4,
vp9_high_sad4x4x3_bits8,
vp9_high_sad4x4x8_bits8,
vp9_high_sad4x4x4d_bits8)
@@ -1297,107 +1144,107 @@ static void high_set_var_fns(VP9_COMP *const cpi) {
case BITS_10:
HIGH_BFP(BLOCK_32X16, vp9_high_sad32x16_bits10,
vp9_high_sad32x16_avg_bits10,
vp9_high_variance32x16_bits10,
vp9_high_sub_pixel_variance32x16_bits10,
vp9_high_sub_pixel_avg_variance32x16_bits10,
vp9_high_10_variance32x16,
vp9_high_10_sub_pixel_variance32x16,
vp9_high_10_sub_pixel_avg_variance32x16,
NULL, NULL, vp9_high_sad32x16x4d_bits10)
HIGH_BFP(BLOCK_16X32, vp9_high_sad16x32_bits10,
vp9_high_sad16x32_avg_bits10,
vp9_high_variance16x32_bits10,
vp9_high_sub_pixel_variance16x32_bits10,
vp9_high_sub_pixel_avg_variance16x32_bits10,
vp9_high_10_variance16x32,
vp9_high_10_sub_pixel_variance16x32,
vp9_high_10_sub_pixel_avg_variance16x32,
NULL, NULL, vp9_high_sad16x32x4d_bits10)
HIGH_BFP(BLOCK_64X32, vp9_high_sad64x32_bits10,
vp9_high_sad64x32_avg_bits10,
vp9_high_variance64x32_bits10,
vp9_high_sub_pixel_variance64x32_bits10,
vp9_high_sub_pixel_avg_variance64x32_bits10,
vp9_high_10_variance64x32,
vp9_high_10_sub_pixel_variance64x32,
vp9_high_10_sub_pixel_avg_variance64x32,
NULL, NULL, vp9_high_sad64x32x4d_bits10)
HIGH_BFP(BLOCK_32X64, vp9_high_sad32x64_bits10,
vp9_high_sad32x64_avg_bits10,
vp9_high_variance32x64_bits10,
vp9_high_sub_pixel_variance32x64_bits10,
vp9_high_sub_pixel_avg_variance32x64_bits10,
vp9_high_10_variance32x64,
vp9_high_10_sub_pixel_variance32x64,
vp9_high_10_sub_pixel_avg_variance32x64,
NULL, NULL, vp9_high_sad32x64x4d_bits10)
HIGH_BFP(BLOCK_32X32, vp9_high_sad32x32_bits10,
vp9_high_sad32x32_avg_bits10,
vp9_high_variance32x32_bits10,
vp9_high_sub_pixel_variance32x32_bits10,
vp9_high_sub_pixel_avg_variance32x32_bits10,
vp9_high_10_variance32x32,
vp9_high_10_sub_pixel_variance32x32,
vp9_high_10_sub_pixel_avg_variance32x32,
vp9_high_sad32x32x3_bits10,
vp9_high_sad32x32x8_bits10,
vp9_high_sad32x32x4d_bits10)
HIGH_BFP(BLOCK_64X64, vp9_high_sad64x64_bits10,
vp9_high_sad64x64_avg_bits10,
vp9_high_variance64x64_bits10,
vp9_high_sub_pixel_variance64x64_bits10,
vp9_high_sub_pixel_avg_variance64x64_bits10,
vp9_high_10_variance64x64,
vp9_high_10_sub_pixel_variance64x64,
vp9_high_10_sub_pixel_avg_variance64x64,
vp9_high_sad64x64x3_bits10,
vp9_high_sad64x64x8_bits10,
vp9_high_sad64x64x4d_bits10)
HIGH_BFP(BLOCK_16X16, vp9_high_sad16x16_bits10,
vp9_high_sad16x16_avg_bits10,
vp9_high_variance16x16_bits10,
vp9_high_sub_pixel_variance16x16_bits10,
vp9_high_sub_pixel_avg_variance16x16_bits10,
vp9_high_10_variance16x16,
vp9_high_10_sub_pixel_variance16x16,
vp9_high_10_sub_pixel_avg_variance16x16,
vp9_high_sad16x16x3_bits10,
vp9_high_sad16x16x8_bits10,
vp9_high_sad16x16x4d_bits10)
HIGH_BFP(BLOCK_16X8, vp9_high_sad16x8_bits10,
vp9_high_sad16x8_avg_bits10,
vp9_high_variance16x8_bits10,
vp9_high_sub_pixel_variance16x8_bits10,
vp9_high_sub_pixel_avg_variance16x8_bits10,
vp9_high_10_variance16x8,
vp9_high_10_sub_pixel_variance16x8,
vp9_high_10_sub_pixel_avg_variance16x8,
vp9_high_sad16x8x3_bits10,
vp9_high_sad16x8x8_bits10,
vp9_high_sad16x8x4d_bits10)
HIGH_BFP(BLOCK_8X16, vp9_high_sad8x16_bits10,
vp9_high_sad8x16_avg_bits10,
vp9_high_variance8x16_bits10,
vp9_high_sub_pixel_variance8x16_bits10,
vp9_high_sub_pixel_avg_variance8x16_bits10,
vp9_high_10_variance8x16,
vp9_high_10_sub_pixel_variance8x16,
vp9_high_10_sub_pixel_avg_variance8x16,
vp9_high_sad8x16x3_bits10,
vp9_high_sad8x16x8_bits10,
vp9_high_sad8x16x4d_bits10)
HIGH_BFP(BLOCK_8X8, vp9_high_sad8x8_bits10,
vp9_high_sad8x8_avg_bits10,
vp9_high_variance8x8_bits10,
vp9_high_sub_pixel_variance8x8_bits10,
vp9_high_sub_pixel_avg_variance8x8_bits10,
vp9_high_10_variance8x8,
vp9_high_10_sub_pixel_variance8x8,
vp9_high_10_sub_pixel_avg_variance8x8,
vp9_high_sad8x8x3_bits10,
vp9_high_sad8x8x8_bits10,
vp9_high_sad8x8x4d_bits10)
HIGH_BFP(BLOCK_8X4, vp9_high_sad8x4_bits10,
vp9_high_sad8x4_avg_bits10,
vp9_high_variance8x4_bits10,
vp9_high_sub_pixel_variance8x4_bits10,
vp9_high_sub_pixel_avg_variance8x4_bits10,
vp9_high_10_variance8x4,
vp9_high_10_sub_pixel_variance8x4,
vp9_high_10_sub_pixel_avg_variance8x4,
NULL, vp9_high_sad8x4x8_bits10,
vp9_high_sad8x4x4d_bits10)
HIGH_BFP(BLOCK_4X8, vp9_high_sad4x8_bits10,
vp9_high_sad4x8_avg_bits10,
vp9_high_variance4x8_bits10,
vp9_high_sub_pixel_variance4x8_bits10,
vp9_high_sub_pixel_avg_variance4x8_bits10,
vp9_high_10_variance4x8,
vp9_high_10_sub_pixel_variance4x8,
vp9_high_10_sub_pixel_avg_variance4x8,
NULL, vp9_high_sad4x8x8_bits10,
vp9_high_sad4x8x4d_bits10)
HIGH_BFP(BLOCK_4X4, vp9_high_sad4x4_bits10,
vp9_high_sad4x4_avg_bits10,
vp9_high_variance4x4_bits10,
vp9_high_sub_pixel_variance4x4_bits10,
vp9_high_sub_pixel_avg_variance4x4_bits10,
vp9_high_10_variance4x4,
vp9_high_10_sub_pixel_variance4x4,
vp9_high_10_sub_pixel_avg_variance4x4,
vp9_high_sad4x4x3_bits10,
vp9_high_sad4x4x8_bits10,
vp9_high_sad4x4x4d_bits10)
@@ -1406,107 +1253,107 @@ static void high_set_var_fns(VP9_COMP *const cpi) {
case BITS_12:
HIGH_BFP(BLOCK_32X16, vp9_high_sad32x16_bits12,
vp9_high_sad32x16_avg_bits12,
vp9_high_variance32x16_bits12,
vp9_high_sub_pixel_variance32x16_bits12,
vp9_high_sub_pixel_avg_variance32x16_bits12,
vp9_high_12_variance32x16,
vp9_high_12_sub_pixel_variance32x16,
vp9_high_12_sub_pixel_avg_variance32x16,
NULL, NULL, vp9_high_sad32x16x4d_bits12)
HIGH_BFP(BLOCK_16X32, vp9_high_sad16x32_bits12,
vp9_high_sad16x32_avg_bits12,
vp9_high_variance16x32_bits12,
vp9_high_sub_pixel_variance16x32_bits12,
vp9_high_sub_pixel_avg_variance16x32_bits12,
vp9_high_12_variance16x32,
vp9_high_12_sub_pixel_variance16x32,
vp9_high_12_sub_pixel_avg_variance16x32,
NULL, NULL, vp9_high_sad16x32x4d_bits12)
HIGH_BFP(BLOCK_64X32, vp9_high_sad64x32_bits12,
vp9_high_sad64x32_avg_bits12,
vp9_high_variance64x32_bits12,
vp9_high_sub_pixel_variance64x32_bits12,
vp9_high_sub_pixel_avg_variance64x32_bits12,
vp9_high_12_variance64x32,
vp9_high_12_sub_pixel_variance64x32,
vp9_high_12_sub_pixel_avg_variance64x32,
NULL, NULL, vp9_high_sad64x32x4d_bits12)
HIGH_BFP(BLOCK_32X64, vp9_high_sad32x64_bits12,
vp9_high_sad32x64_avg_bits12,
vp9_high_variance32x64_bits12,
vp9_high_sub_pixel_variance32x64_bits12,
vp9_high_sub_pixel_avg_variance32x64_bits12,
vp9_high_12_variance32x64,
vp9_high_12_sub_pixel_variance32x64,
vp9_high_12_sub_pixel_avg_variance32x64,
NULL, NULL, vp9_high_sad32x64x4d_bits12)
HIGH_BFP(BLOCK_32X32, vp9_high_sad32x32_bits12,
vp9_high_sad32x32_avg_bits12,
vp9_high_variance32x32_bits12,
vp9_high_sub_pixel_variance32x32_bits12,
vp9_high_sub_pixel_avg_variance32x32_bits12,
vp9_high_12_variance32x32,
vp9_high_12_sub_pixel_variance32x32,
vp9_high_12_sub_pixel_avg_variance32x32,
vp9_high_sad32x32x3_bits12,
vp9_high_sad32x32x8_bits12,
vp9_high_sad32x32x4d_bits12)
HIGH_BFP(BLOCK_64X64, vp9_high_sad64x64_bits12,
vp9_high_sad64x64_avg_bits12,
vp9_high_variance64x64_bits12,
vp9_high_sub_pixel_variance64x64_bits12,
vp9_high_sub_pixel_avg_variance64x64_bits12,
vp9_high_12_variance64x64,
vp9_high_12_sub_pixel_variance64x64,
vp9_high_12_sub_pixel_avg_variance64x64,
vp9_high_sad64x64x3_bits12,
vp9_high_sad64x64x8_bits12,
vp9_high_sad64x64x4d_bits12)
HIGH_BFP(BLOCK_16X16, vp9_high_sad16x16_bits12,
vp9_high_sad16x16_avg_bits12,
vp9_high_variance16x16_bits12,
vp9_high_sub_pixel_variance16x16_bits12,
vp9_high_sub_pixel_avg_variance16x16_bits12,
vp9_high_12_variance16x16,
vp9_high_12_sub_pixel_variance16x16,
vp9_high_12_sub_pixel_avg_variance16x16,
vp9_high_sad16x16x3_bits12,
vp9_high_sad16x16x8_bits12,
vp9_high_sad16x16x4d_bits12)
HIGH_BFP(BLOCK_16X8, vp9_high_sad16x8_bits12,
vp9_high_sad16x8_avg_bits12,
vp9_high_variance16x8_bits12,
vp9_high_sub_pixel_variance16x8_bits12,
vp9_high_sub_pixel_avg_variance16x8_bits12,
vp9_high_12_variance16x8,
vp9_high_12_sub_pixel_variance16x8,
vp9_high_12_sub_pixel_avg_variance16x8,
vp9_high_sad16x8x3_bits12,
vp9_high_sad16x8x8_bits12,
vp9_high_sad16x8x4d_bits12)
HIGH_BFP(BLOCK_8X16, vp9_high_sad8x16_bits12,
vp9_high_sad8x16_avg_bits12,
vp9_high_variance8x16_bits12,
vp9_high_sub_pixel_variance8x16_bits12,
vp9_high_sub_pixel_avg_variance8x16_bits12,
vp9_high_12_variance8x16,
vp9_high_12_sub_pixel_variance8x16,
vp9_high_12_sub_pixel_avg_variance8x16,
vp9_high_sad8x16x3_bits12,
vp9_high_sad8x16x8_bits12,
vp9_high_sad8x16x4d_bits12)
HIGH_BFP(BLOCK_8X8, vp9_high_sad8x8_bits12,
vp9_high_sad8x8_avg_bits12,
vp9_high_variance8x8_bits12,
vp9_high_sub_pixel_variance8x8_bits12,
vp9_high_sub_pixel_avg_variance8x8_bits12,
vp9_high_12_variance8x8,
vp9_high_12_sub_pixel_variance8x8,
vp9_high_12_sub_pixel_avg_variance8x8,
vp9_high_sad8x8x3_bits12,
vp9_high_sad8x8x8_bits12,
vp9_high_sad8x8x4d_bits12)
HIGH_BFP(BLOCK_8X4, vp9_high_sad8x4_bits12,
vp9_high_sad8x4_avg_bits12,
vp9_high_variance8x4_bits12,
vp9_high_sub_pixel_variance8x4_bits12,
vp9_high_sub_pixel_avg_variance8x4_bits12,
vp9_high_12_variance8x4,
vp9_high_12_sub_pixel_variance8x4,
vp9_high_12_sub_pixel_avg_variance8x4,
NULL, vp9_high_sad8x4x8_bits12,
vp9_high_sad8x4x4d_bits12)
HIGH_BFP(BLOCK_4X8, vp9_high_sad4x8_bits12,
vp9_high_sad4x8_avg_bits12,
vp9_high_variance4x8_bits12,
vp9_high_sub_pixel_variance4x8_bits12,
vp9_high_sub_pixel_avg_variance4x8_bits12,
vp9_high_12_variance4x8,
vp9_high_12_sub_pixel_variance4x8,
vp9_high_12_sub_pixel_avg_variance4x8,
NULL, vp9_high_sad4x8x8_bits12,
vp9_high_sad4x8x4d_bits12)
HIGH_BFP(BLOCK_4X4, vp9_high_sad4x4_bits12,
vp9_high_sad4x4_avg_bits12,
vp9_high_variance4x4_bits12,
vp9_high_sub_pixel_variance4x4_bits12,
vp9_high_sub_pixel_avg_variance4x4_bits12,
vp9_high_12_variance4x4,
vp9_high_12_sub_pixel_variance4x4,
vp9_high_12_sub_pixel_avg_variance4x4,
vp9_high_sad4x4x3_bits12,
vp9_high_sad4x4x8_bits12,
vp9_high_sad4x4x4d_bits12)
@@ -2784,10 +2631,19 @@ static void encode_with_recode_loop(VP9_COMP *cpi,
rc->this_key_frame_forced &&
(rc->projected_frame_size < rc->max_frame_bandwidth)) {
int last_q = q;
int kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
int kf_err;
int high_err_target = cpi->ambient_err;
int low_err_target = cpi->ambient_err >> 1;
#if CONFIG_VP9_HIGH
if (cm->use_high)
kf_err = vp9_high_get_y_sse(cpi->Source, get_frame_new_buffer(cm),
cm->bit_depth);
else
kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
#else
kf_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
#endif
// Prevent possible divide by zero error below for perfect KF
kf_err += !kf_err;
@@ -3141,20 +2997,15 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
// fixed interval. Note the reconstruction error if it is the frame before
// the force key frame
if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
#if CONFIG_VP9_HIGH
if (cm->use_high) {
switch (cm->bit_depth) {
default:
break;
case BITS_10:
cpi->ambient_err >>= 4;
break;
case BITS_12:
cpi->ambient_err >>= 8;
break;
}
}
if (cm->use_high)
cpi->ambient_err = vp9_high_get_y_sse(cpi->Source,
get_frame_new_buffer(cm),
cm->bit_depth);
else
cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
#else
cpi->ambient_err = vp9_get_y_sse(cpi->Source, get_frame_new_buffer(cm));
#endif
}
@@ -3945,17 +3796,37 @@ void vp9_set_svc(VP9_COMP *cpi, int use_svc) {
int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b) {
assert(a->y_crop_width == b->y_crop_width);
assert(a->y_crop_height == b->y_crop_height);
#if CONFIG_VP9_HIGH
if (a->flags & YV12_FLAG_HIGH) {
return (int) high_get_sse(a->y_buffer, a->y_stride, b->y_buffer,
b->y_stride, a->y_crop_width,
a->y_crop_height);
}
#endif
return (int)get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
a->y_crop_width, a->y_crop_height);
}
#if CONFIG_VP9_HIGH
int vp9_high_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
BIT_DEPTH bit_depth) {
unsigned int sse;
int sum;
assert(a->y_crop_width == b->y_crop_width);
assert(a->y_crop_height == b->y_crop_height);
assert((a->flags & YV12_FLAG_HIGH) != 0);
assert((b->flags & YV12_FLAG_HIGH) != 0);
switch (bit_depth) {
default:
case BITS_8:
high_variance(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
a->y_crop_width, a->y_crop_height, &sse, &sum);
return (int) sse;
case BITS_10:
high_10_variance(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
a->y_crop_width, a->y_crop_height, &sse, &sum);
return (int) sse;
case BITS_12:
high_12_variance(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride,
a->y_crop_width, a->y_crop_height, &sse, &sum);
return (int) sse;
}
assert(0);
}
#endif
int vp9_get_quantizer(VP9_COMP *cpi) {
return cpi->common.base_qindex;

View File

@@ -610,6 +610,11 @@ static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
#if CONFIG_VP9_HIGH
int vp9_high_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b,
BIT_DEPTH bit_depth);
#endif
void vp9_alloc_compressor_data(VP9_COMP *cpi);
void vp9_scale_references(VP9_COMP *cpi);

View File

@@ -405,31 +405,6 @@ static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
#if CONFIG_VP9_HIGH
#define MAKE_MSE_WRAPPER(fnname) \
static unsigned int fnname##_bits10(const uint8_t *src_ptr, \
int source_stride, \
const uint8_t *ref_ptr, \
int ref_stride, \
unsigned int *sse) { \
unsigned int val = fnname(src_ptr, source_stride, ref_ptr, ref_stride, sse); \
*sse >>= 4; \
return val >> 4; \
} \
static unsigned int fnname##_bits12(const uint8_t *src_ptr, \
int source_stride, \
const uint8_t *ref_ptr, \
int ref_stride, \
unsigned int *sse) { \
unsigned int val = fnname(src_ptr, source_stride, ref_ptr, ref_stride, sse); \
*sse >>= 8; \
return val >> 8; \
}
MAKE_MSE_WRAPPER(vp9_high_mse8x8)
MAKE_MSE_WRAPPER(vp9_high_mse16x8)
MAKE_MSE_WRAPPER(vp9_high_mse8x16)
MAKE_MSE_WRAPPER(vp9_high_mse16x16)
static vp9_variance_fn_t high_get_block_variance_fn(BLOCK_SIZE bsize, int bps) {
switch (bps) {
default:
@@ -447,25 +422,25 @@ static vp9_variance_fn_t high_get_block_variance_fn(BLOCK_SIZE bsize, int bps) {
case 10:
switch (bsize) {
case BLOCK_8X8:
return vp9_high_mse8x8_bits10;
return vp9_high_10_mse8x8;
case BLOCK_16X8:
return vp9_high_mse16x8_bits10;
return vp9_high_10_mse16x8;
case BLOCK_8X16:
return vp9_high_mse8x16_bits10;
return vp9_high_10_mse8x16;
default:
return vp9_high_mse16x16_bits10;
return vp9_high_10_mse16x16;
}
break;
case 12:
switch (bsize) {
case BLOCK_8X8:
return vp9_high_mse8x8_bits12;
return vp9_high_12_mse8x8;
case BLOCK_16X8:
return vp9_high_mse16x8_bits12;
return vp9_high_12_mse16x8;
case BLOCK_8X16:
return vp9_high_mse8x16_bits12;
return vp9_high_12_mse8x16;
default:
return vp9_high_mse16x16_bits12;
return vp9_high_12_mse16x16;
}
break;
}

View File

@@ -403,7 +403,8 @@ int vp9_find_best_sub_pixel_comp_tree(const MACROBLOCK *x,
#if CONFIG_VP9_HIGH
if (xd->cur_buf->flags & YV12_FLAG_HIGH) {
high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, y_stride);
vp9_high_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
y_stride);
besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, z, src_stride, sse1);
} else {
vp9_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride);

View File

@@ -40,8 +40,15 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi,
vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->mb.e_mbd, filt_level, 1,
partial_frame);
#if CONFIG_VP9_HIGH
if (cm->use_high) {
filt_err = vp9_high_get_y_sse(sd, cm->frame_to_show, cm->bit_depth);
} else {
filt_err = vp9_get_y_sse(sd, cm->frame_to_show);
}
#else
filt_err = vp9_get_y_sse(sd, cm->frame_to_show);
#endif
// Re-instate the unfiltered frame
vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show);

View File

@@ -184,7 +184,7 @@ unsigned int vp9_high_sad##m##x##n##_avg_c(const uint8_t *src, int src_stride, \
const uint8_t *second_pred, \
unsigned int max_sad) { \
uint16_t comp_pred[m * n]; \
high_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
vp9_high_comp_avg_pred(comp_pred, second_pred, m, n, ref, ref_stride); \
return high_sadb(src, src_stride, comp_pred, m, m, n); \
}

View File

@@ -259,7 +259,8 @@ void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
void high_variance(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
int w, int h, unsigned int *sse, int *sum) {
int w, int h, unsigned int *sse,
int *sum) {
int i, j;
uint16_t *a = CONVERT_TO_SHORTPTR(a8);
@@ -273,12 +274,59 @@ void high_variance(const uint8_t *a8, int a_stride,
*sum += diff;
*sse += diff * diff;
}
a += a_stride;
b += b_stride;
}
}
void high_10_variance(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
int w, int h, unsigned int *sse,
int *sum) {
int i, j;
uint64_t sse_long = 0;
uint64_t sum_long = 0;
uint16_t *a = CONVERT_TO_SHORTPTR(a8);
uint16_t *b = CONVERT_TO_SHORTPTR(b8);
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
const int diff = a[j] - b[j];
sum_long += diff;
sse_long += diff * diff;
}
a += a_stride;
b += b_stride;
}
*sum = ROUND_POWER_OF_TWO(sum_long, 2);
*sse = ROUND_POWER_OF_TWO(sse_long, 4);
}
void high_12_variance(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
int w, int h, unsigned int *sse,
int *sum) {
int i, j;
uint64_t sse_long = 0;
uint64_t sum_long = 0;
uint16_t *a = CONVERT_TO_SHORTPTR(a8);
uint16_t *b = CONVERT_TO_SHORTPTR(b8);
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
const int diff = a[j] - b[j];
sum_long += diff;
sse_long += diff * diff;
}
a += a_stride;
b += b_stride;
}
*sum = ROUND_POWER_OF_TWO(sum_long, 4);
*sse = ROUND_POWER_OF_TWO(sse_long, 8);
}
static void high_var_filter_block2d_bil_first_pass(
const uint8_t *src_ptr8,
uint16_t *output_ptr,
@@ -336,18 +384,27 @@ unsigned int vp9_high_get_mb_ss_c(const int16_t *src_ptr) {
return sum;
}
unsigned int vp9_high_variance64x32_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride,
64, 32, &var, &avg);
*sse = var;
return (var - (((int64_t)avg * avg) >> 11));
#define HIGH_VAR(W, H) \
unsigned int vp9_high_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
const uint8_t *b, int b_stride, \
unsigned int *sse) { \
int sum; \
high_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
return *sse - (((int64_t)sum * sum) / (W * H)); \
} \
unsigned int vp9_high_10_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
const uint8_t *b, int b_stride, \
unsigned int *sse) { \
int sum; \
high_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
return *sse - (((int64_t)sum * sum) / (W * H)); \
} \
unsigned int vp9_high_12_variance##W##x##H##_c(const uint8_t *a, int a_stride, \
const uint8_t *b, int b_stride, \
unsigned int *sse) { \
int sum; \
high_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \
return *sse - (((int64_t)sum * sum) / (W * H)); \
}
#define HIGH_SUBPIX_VAR(W, H) \
@@ -366,6 +423,38 @@ unsigned int vp9_high_sub_pixel_variance##W##x##H##_c( \
\
return vp9_high_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
dst_stride, sse); \
} \
unsigned int vp9_high_10_sub_pixel_variance##W##x##H##_c( \
const uint8_t *src, int src_stride, \
int xoffset, int yoffset, \
const uint8_t *dst, int dst_stride, \
unsigned int *sse) { \
uint16_t fdata3[(H + 1) * W]; \
uint16_t temp2[H * W]; \
\
high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
\
return vp9_high_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
dst_stride, sse); \
} \
unsigned int vp9_high_12_sub_pixel_variance##W##x##H##_c( \
const uint8_t *src, int src_stride, \
int xoffset, int yoffset, \
const uint8_t *dst, int dst_stride, \
unsigned int *sse) { \
uint16_t fdata3[(H + 1) * W]; \
uint16_t temp2[H * W]; \
\
high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
\
return vp9_high_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
dst_stride, sse); \
}
@@ -385,394 +474,150 @@ unsigned int vp9_high_sub_pixel_avg_variance##W##x##H##_c( \
high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
\
high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), W); \
vp9_high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), \
W); \
\
return vp9_high_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
dst_stride, sse); \
} \
unsigned int vp9_high_10_sub_pixel_avg_variance##W##x##H##_c( \
const uint8_t *src, int src_stride, \
int xoffset, int yoffset, \
const uint8_t *dst, int dst_stride, \
unsigned int *sse, \
const uint8_t *second_pred) { \
uint16_t fdata3[(H + 1) * W]; \
uint16_t temp2[H * W]; \
DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \
\
high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
\
vp9_high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), \
W); \
\
return vp9_high_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
dst_stride, sse); \
} \
unsigned int vp9_high_12_sub_pixel_avg_variance##W##x##H##_c( \
const uint8_t *src, int src_stride, \
int xoffset, int yoffset, \
const uint8_t *dst, int dst_stride, \
unsigned int *sse, \
const uint8_t *second_pred) { \
uint16_t fdata3[(H + 1) * W]; \
uint16_t temp2[H * W]; \
DECLARE_ALIGNED_ARRAY(16, uint16_t, temp3, H * W); \
\
high_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
W, BILINEAR_FILTERS_2TAP(xoffset)); \
high_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
BILINEAR_FILTERS_2TAP(yoffset)); \
\
vp9_high_comp_avg_pred(temp3, second_pred, W, H, CONVERT_TO_BYTEPTR(temp2), \
W); \
\
return vp9_high_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \
dst_stride, sse); \
}
unsigned int vp9_high_variance32x64_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride,
32, 64, &var, &avg);
*sse = var;
return (var - (((int64_t)avg * avg) >> 11));
#define HIGH_MSE(W, H) \
unsigned int vp9_high_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
const uint8_t *ref, int ref_stride, \
unsigned int *sse) { \
int sum; \
high_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
return *sse; \
} \
unsigned int vp9_high_10_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
const uint8_t *ref, int ref_stride, \
unsigned int *sse) { \
int sum; \
high_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
return *sse; \
} \
unsigned int vp9_high_12_mse##W##x##H##_c(const uint8_t *src, int src_stride, \
const uint8_t *ref, int ref_stride, \
unsigned int *sse) { \
int sum; \
high_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \
return *sse; \
}
unsigned int vp9_high_variance32x16_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16,
&var, &avg);
*sse = var;
return (var - (((int64_t)avg * avg) >> 9));
}
unsigned int vp9_high_variance16x32_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride,
16, 32, &var, &avg);
*sse = var;
return (var - (((int64_t)avg * avg) >> 9));
}
unsigned int vp9_high_variance64x64_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64,
&var, &avg);
*sse = var;
return (var - (((int64_t)avg * avg) >> 12));
}
unsigned int vp9_high_variance32x32_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32,
&var, &avg);
*sse = var;
return (var - (((int64_t)avg * avg) >> 10));
}
unsigned int vp9_high_variance16x16_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16,
&var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 8));
}
unsigned int vp9_high_variance8x16_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16,
&var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 7));
}
unsigned int vp9_high_variance16x8_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8,
&var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 7));
}
unsigned int vp9_high_variance8x8_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8,
&var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 6));
}
unsigned int vp9_high_variance8x4_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4,
&var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 5));
}
unsigned int vp9_high_variance4x8_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8,
&var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 5));
}
unsigned int vp9_high_variance4x4_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4,
&var, &avg);
*sse = var;
return (var - (((unsigned int)avg * avg) >> 4));
}
unsigned int vp9_high_mse16x16_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16,
&var, &avg);
*sse = var;
return var;
}
unsigned int vp9_high_mse16x8_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8,
&var, &avg);
*sse = var;
return var;
}
unsigned int vp9_high_mse8x16_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16,
&var, &avg);
*sse = var;
return var;
}
unsigned int vp9_high_mse8x8_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
unsigned int var;
int avg;
high_variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8,
&var, &avg);
*sse = var;
return var;
}
HIGH_MSE(16, 16)
HIGH_MSE(16, 8)
HIGH_MSE(8, 16)
HIGH_MSE(8, 8)
HIGH_VAR(4, 4)
HIGH_SUBPIX_VAR(4, 4)
HIGH_SUBPIX_AVG_VAR(4, 4)
HIGH_VAR(4, 8)
HIGH_SUBPIX_VAR(4, 8)
HIGH_SUBPIX_AVG_VAR(4, 8)
HIGH_VAR(8, 4)
HIGH_SUBPIX_VAR(8, 4)
HIGH_SUBPIX_AVG_VAR(8, 4)
HIGH_VAR(8, 8)
HIGH_SUBPIX_VAR(8, 8)
HIGH_SUBPIX_AVG_VAR(8, 8)
HIGH_VAR(8, 16)
HIGH_SUBPIX_VAR(8, 16)
HIGH_SUBPIX_AVG_VAR(8, 16)
HIGH_VAR(16, 8)
HIGH_SUBPIX_VAR(16, 8)
HIGH_SUBPIX_AVG_VAR(16, 8)
HIGH_VAR(16, 16)
HIGH_SUBPIX_VAR(16, 16)
HIGH_SUBPIX_AVG_VAR(16, 16)
HIGH_VAR(16, 32)
HIGH_SUBPIX_VAR(16, 32)
HIGH_SUBPIX_AVG_VAR(16, 32)
HIGH_VAR(32, 16)
HIGH_SUBPIX_VAR(32, 16)
HIGH_SUBPIX_AVG_VAR(32, 16)
HIGH_VAR(32, 32)
HIGH_SUBPIX_VAR(32, 32)
HIGH_SUBPIX_AVG_VAR(32, 32)
HIGH_VAR(32, 64)
HIGH_SUBPIX_VAR(32, 64)
HIGH_SUBPIX_AVG_VAR(32, 64)
HIGH_VAR(64, 32)
HIGH_SUBPIX_VAR(64, 32)
HIGH_SUBPIX_AVG_VAR(64, 32)
HIGH_VAR(64, 64)
HIGH_SUBPIX_VAR(64, 64)
HIGH_SUBPIX_AVG_VAR(64, 64)
unsigned int vp9_high_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
return vp9_high_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0,
ref_ptr, recon_stride, sse);
}
unsigned int vp9_high_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
return vp9_high_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0,
ref_ptr, recon_stride, sse);
}
unsigned int vp9_high_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
return vp9_high_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0,
ref_ptr, recon_stride, sse);
}
unsigned int vp9_high_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
return vp9_high_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8,
ref_ptr, recon_stride, sse);
}
unsigned int vp9_high_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
return vp9_high_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8,
ref_ptr, recon_stride, sse);
}
unsigned int vp9_high_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
return vp9_high_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8,
ref_ptr, recon_stride, sse);
}
unsigned int vp9_high_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
return vp9_high_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8,
ref_ptr, recon_stride, sse);
}
unsigned int vp9_high_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
return vp9_high_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8,
ref_ptr, recon_stride, sse);
}
unsigned int vp9_high_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr,
int source_stride,
const uint8_t *ref_ptr,
int recon_stride,
unsigned int *sse) {
return vp9_high_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8,
ref_ptr, recon_stride, sse);
}
unsigned int vp9_high_sub_pixel_mse16x16_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
vp9_high_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line,
xoffset, yoffset, dst_ptr,
dst_pixels_per_line, sse);
return *sse;
}
unsigned int vp9_high_sub_pixel_mse32x32_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
vp9_high_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line,
xoffset, yoffset, dst_ptr,
dst_pixels_per_line, sse);
return *sse;
}
unsigned int vp9_high_sub_pixel_mse64x64_c(const uint8_t *src_ptr,
int src_pixels_per_line,
int xoffset,
int yoffset,
const uint8_t *dst_ptr,
int dst_pixels_per_line,
unsigned int *sse) {
vp9_high_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line,
xoffset, yoffset, dst_ptr,
dst_pixels_per_line, sse);
return *sse;
void vp9_high_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
int width, int height, const uint8_t *ref8,
int ref_stride) {
int i, j;
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
const int tmp = pred[j] + ref[j];
comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
}
comp_pred += width;
pred += width;
ref += ref_stride;
}
}
#endif

View File

@@ -27,6 +27,16 @@ void high_variance(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
int w, int h,
unsigned int *sse, int *sum);
void high_10_variance(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
int w, int h,
unsigned int *sse, int *sum);
void high_12_variance(const uint8_t *a8, int a_stride,
const uint8_t *b8, int b_stride,
int w, int h,
unsigned int *sse, int *sum);
#endif
@@ -91,23 +101,9 @@ typedef struct vp9_variance_vtable {
void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width,
int height, const uint8_t *ref, int ref_stride);
#if defined(CONVERT_TO_SHORTPTR) && CONFIG_VP9_HIGH
static void high_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8,
int width, int height, const uint8_t *ref8,
int ref_stride) {
int i, j;
uint16_t *pred = CONVERT_TO_SHORTPTR(pred8);
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
for (i = 0; i < height; i++) {
for (j = 0; j < width; j++) {
const int tmp = pred[j] + ref[j];
comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1);
}
comp_pred += width;
pred += width;
ref += ref_stride;
}
}
#if CONFIG_VP9_HIGH
void vp9_high_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred, int width,
int height, const uint8_t *ref, int ref_stride);
#endif
#ifdef __cplusplus