2014-02-24 01:33:14 +01:00
sub vp9_common_forward_decls () {
print << EOF
/ *
* VP9
* /
#include "vpx/vpx_integer.h"
2014-09-30 23:29:34 +02:00
#include "vp9/common/vp9_common.h"
2014-02-24 01:33:14 +01:00
#include "vp9/common/vp9_enums.h"
struct macroblockd ;
/* Encoder forward decls */
struct macroblock ;
struct vp9_variance_vtable ;
2014-04-26 03:22:27 +02:00
struct search_site_config ;
2014-02-24 01:33:14 +01:00
struct mv ;
union int_mv ;
struct yv12_buffer_config ;
EOF
}
forward_decls qw/vp9_common_forward_decls/ ;
2015-06-30 01:44:30 +02:00
# x86inc.asm had specific constraints. break it out so it's easy to disable.
# zero all the variables to avoid tricky else conditions.
$ mmx_x86inc = $ sse_x86inc = $ sse2_x86inc = $ ssse3_x86inc = $ avx_x86inc =
$ avx2_x86inc = '' ;
$ mmx_x86_64_x86inc = $ sse_x86_64_x86inc = $ sse2_x86_64_x86inc =
$ ssse3_x86_64_x86inc = $ avx_x86_64_x86inc = $ avx2_x86_64_x86inc = '' ;
2014-02-24 01:33:14 +01:00
if ( vpx_config ( "CONFIG_USE_X86INC" ) eq "yes" ) {
$ mmx_x86inc = 'mmx' ;
$ sse_x86inc = 'sse' ;
$ sse2_x86inc = 'sse2' ;
$ ssse3_x86inc = 'ssse3' ;
$ avx_x86inc = 'avx' ;
$ avx2_x86inc = 'avx2' ;
2015-06-30 01:44:30 +02:00
if ( $ opts { arch } eq "x86_64" ) {
$ mmx_x86_64_x86inc = 'mmx' ;
$ sse_x86_64_x86inc = 'sse' ;
$ sse2_x86_64_x86inc = 'sse2' ;
$ ssse3_x86_64_x86inc = 'ssse3' ;
$ avx_x86_64_x86inc = 'avx' ;
$ avx2_x86_64_x86inc = 'avx2' ;
}
2014-02-24 01:33:14 +01:00
}
2015-06-30 01:44:30 +02:00
# functions that are 64 bit only.
$ mmx_x86_64 = $ sse2_x86_64 = $ ssse3_x86_64 = $ avx_x86_64 = $ avx2_x86_64 = '' ;
2014-02-24 01:33:14 +01:00
if ( $ opts { arch } eq "x86_64" ) {
$ mmx_x86_64 = 'mmx' ;
$ sse2_x86_64 = 'sse2' ;
$ ssse3_x86_64 = 'ssse3' ;
$ avx_x86_64 = 'avx' ;
$ avx2_x86_64 = 'avx2' ;
}
#
# post proc
#
if ( vpx_config ( "CONFIG_VP9_POSTPROC" ) eq "yes" ) {
add_proto qw/void vp9_mbpost_proc_down/ , "uint8_t *dst, int pitch, int rows, int cols, int flimit" ;
2014-09-05 20:52:24 +02:00
specialize qw/vp9_mbpost_proc_down sse2/ ;
2014-02-24 01:33:14 +01:00
$ vp9_mbpost_proc_down_sse2 = vp9_mbpost_proc_down_xmm ;
add_proto qw/void vp9_mbpost_proc_across_ip/ , "uint8_t *src, int pitch, int rows, int cols, int flimit" ;
specialize qw/vp9_mbpost_proc_across_ip sse2/ ;
$ vp9_mbpost_proc_across_ip_sse2 = vp9_mbpost_proc_across_ip_xmm ;
add_proto qw/void vp9_post_proc_down_and_across/ , "const uint8_t *src_ptr, uint8_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit" ;
2014-09-05 20:52:24 +02:00
specialize qw/vp9_post_proc_down_and_across sse2/ ;
2014-02-24 01:33:14 +01:00
$ vp9_post_proc_down_and_across_sse2 = vp9_post_proc_down_and_across_xmm ;
add_proto qw/void vp9_plane_add_noise/ , "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch" ;
2014-09-05 20:52:24 +02:00
specialize qw/vp9_plane_add_noise sse2/ ;
2014-02-24 01:33:14 +01:00
$ vp9_plane_add_noise_sse2 = vp9_plane_add_noise_wmt ;
2015-01-13 19:50:50 +01:00
add_proto qw/void vp9_filter_by_weight16x16/ , "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight" ;
2015-06-11 05:41:04 +02:00
specialize qw/vp9_filter_by_weight16x16 sse2 msa/ ;
2015-01-13 19:50:50 +01:00
add_proto qw/void vp9_filter_by_weight8x8/ , "const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight" ;
2015-06-11 05:41:04 +02:00
specialize qw/vp9_filter_by_weight8x8 sse2 msa/ ;
2014-02-24 01:33:14 +01:00
}
#
# dct
#
2014-09-03 01:34:09 +02:00
if ( vpx_config ( "CONFIG_VP9_HIGHBITDEPTH" ) eq "yes" ) {
2014-10-03 00:43:27 +02:00
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
2014-09-03 01:34:09 +02:00
add_proto qw/void vp9_iht4x4_16_add/ , "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type" ;
specialize qw/vp9_iht4x4_16_add/ ;
add_proto qw/void vp9_iht8x8_64_add/ , "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type" ;
specialize qw/vp9_iht8x8_64_add/ ;
add_proto qw/void vp9_iht16x16_256_add/ , "const tran_low_t *input, uint8_t *output, int pitch, int tx_type" ;
specialize qw/vp9_iht16x16_256_add/ ;
} else {
2014-10-03 00:43:27 +02:00
# Force C versions if CONFIG_EMULATE_HARDWARE is 1
if ( vpx_config ( "CONFIG_EMULATE_HARDWARE" ) eq "yes" ) {
add_proto qw/void vp9_iht4x4_16_add/ , "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type" ;
specialize qw/vp9_iht4x4_16_add/ ;
2014-09-03 01:34:09 +02:00
2014-10-03 00:43:27 +02:00
add_proto qw/void vp9_iht8x8_64_add/ , "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type" ;
specialize qw/vp9_iht8x8_64_add/ ;
2014-09-03 01:34:09 +02:00
2014-10-03 00:43:27 +02:00
add_proto qw/void vp9_iht16x16_256_add/ , "const tran_low_t *input, uint8_t *output, int pitch, int tx_type" ;
specialize qw/vp9_iht16x16_256_add/ ;
} else {
add_proto qw/void vp9_iht4x4_16_add/ , "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type" ;
2015-06-02 08:46:28 +02:00
specialize qw/vp9_iht4x4_16_add sse2 neon dspr2 msa/ ;
2014-10-03 00:43:27 +02:00
add_proto qw/void vp9_iht8x8_64_add/ , "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type" ;
2015-06-01 05:49:01 +02:00
specialize qw/vp9_iht8x8_64_add sse2 neon dspr2 msa/ ;
2014-10-03 00:43:27 +02:00
add_proto qw/void vp9_iht16x16_256_add/ , "const tran_low_t *input, uint8_t *output, int pitch, int tx_type" ;
2015-06-01 05:49:01 +02:00
specialize qw/vp9_iht16x16_256_add sse2 dspr2 msa/ ;
2014-10-03 00:43:27 +02:00
}
2014-09-03 01:34:09 +02:00
}
2014-02-24 01:33:14 +01:00
2014-09-03 01:34:09 +02:00
# High bitdepth functions
if ( vpx_config ( "CONFIG_VP9_HIGHBITDEPTH" ) eq "yes" ) {
2014-09-16 21:47:18 +02:00
#
# Sub Pixel Filters
#
2014-10-08 21:43:22 +02:00
add_proto qw/void vp9_highbd_convolve_copy/ , "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps" ;
specialize qw/vp9_highbd_convolve_copy/ ;
2014-09-16 21:47:18 +02:00
2014-10-08 21:43:22 +02:00
add_proto qw/void vp9_highbd_convolve_avg/ , "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps" ;
specialize qw/vp9_highbd_convolve_avg/ ;
2014-09-16 21:47:18 +02:00
2014-10-08 21:43:22 +02:00
add_proto qw/void vp9_highbd_convolve8/ , "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps" ;
specialize qw/vp9_highbd_convolve8/ , "$sse2_x86_64" ;
2014-09-16 21:47:18 +02:00
2014-10-08 21:43:22 +02:00
add_proto qw/void vp9_highbd_convolve8_horiz/ , "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps" ;
specialize qw/vp9_highbd_convolve8_horiz/ , "$sse2_x86_64" ;
2014-09-16 21:47:18 +02:00
2014-10-08 21:43:22 +02:00
add_proto qw/void vp9_highbd_convolve8_vert/ , "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps" ;
specialize qw/vp9_highbd_convolve8_vert/ , "$sse2_x86_64" ;
2014-09-16 21:47:18 +02:00
2014-10-08 21:43:22 +02:00
add_proto qw/void vp9_highbd_convolve8_avg/ , "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps" ;
specialize qw/vp9_highbd_convolve8_avg/ , "$sse2_x86_64" ;
2014-09-16 21:47:18 +02:00
2014-10-08 21:43:22 +02:00
add_proto qw/void vp9_highbd_convolve8_avg_horiz/ , "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps" ;
specialize qw/vp9_highbd_convolve8_avg_horiz/ , "$sse2_x86_64" ;
2014-09-16 21:47:18 +02:00
2014-10-08 21:43:22 +02:00
add_proto qw/void vp9_highbd_convolve8_avg_vert/ , "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps" ;
specialize qw/vp9_highbd_convolve8_avg_vert/ , "$sse2_x86_64" ;
2014-09-16 21:47:18 +02:00
2014-09-18 01:55:05 +02:00
#
# post proc
#
if ( vpx_config ( "CONFIG_VP9_POSTPROC" ) eq "yes" ) {
add_proto qw/void vp9_highbd_mbpost_proc_down/ , "uint16_t *dst, int pitch, int rows, int cols, int flimit" ;
specialize qw/vp9_highbd_mbpost_proc_down/ ;
add_proto qw/void vp9_highbd_mbpost_proc_across_ip/ , "uint16_t *src, int pitch, int rows, int cols, int flimit" ;
specialize qw/vp9_highbd_mbpost_proc_across_ip/ ;
add_proto qw/void vp9_highbd_post_proc_down_and_across/ , "const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit" ;
specialize qw/vp9_highbd_post_proc_down_and_across/ ;
add_proto qw/void vp9_highbd_plane_add_noise/ , "uint8_t *Start, char *noise, char blackclamp[16], char whiteclamp[16], char bothclamp[16], unsigned int Width, unsigned int Height, int Pitch" ;
specialize qw/vp9_highbd_plane_add_noise/ ;
}
2014-09-03 01:34:09 +02:00
#
# dct
#
2014-10-03 00:43:27 +02:00
# Note as optimized versions of these functions are added we need to add a check to ensure
# that when CONFIG_EMULATE_HARDWARE is on, it defaults to the C versions only.
2014-10-08 21:43:22 +02:00
add_proto qw/void vp9_highbd_iht4x4_16_add/ , "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd" ;
specialize qw/vp9_highbd_iht4x4_16_add/ ;
2014-09-03 01:34:09 +02:00
2014-10-08 21:43:22 +02:00
add_proto qw/void vp9_highbd_iht8x8_64_add/ , "const tran_low_t *input, uint8_t *dest, int dest_stride, int tx_type, int bd" ;
specialize qw/vp9_highbd_iht8x8_64_add/ ;
2014-09-03 01:34:09 +02:00
2014-10-08 21:43:22 +02:00
add_proto qw/void vp9_highbd_iht16x16_256_add/ , "const tran_low_t *input, uint8_t *output, int pitch, int tx_type, int bd" ;
specialize qw/vp9_highbd_iht16x16_256_add/ ;
2014-09-03 01:34:09 +02:00
}
2014-02-24 01:33:14 +01:00
#
# Encoder functions below this point.
#
if ( vpx_config ( "CONFIG_VP9_ENCODER" ) eq "yes" ) {
2014-10-08 01:36:14 +02:00
add_proto qw/unsigned int vp9_avg_8x8/ , "const uint8_t *, int p" ;
2015-06-23 04:02:25 +02:00
specialize qw/vp9_avg_8x8 sse2 neon msa/ ;
2014-10-08 01:36:14 +02:00
2014-11-12 23:51:49 +01:00
add_proto qw/unsigned int vp9_avg_4x4/ , "const uint8_t *, int p" ;
2015-06-23 04:02:25 +02:00
specialize qw/vp9_avg_4x4 sse2 msa/ ;
2014-11-12 23:51:49 +01:00
2015-04-16 02:48:20 +02:00
add_proto qw/void vp9_minmax_8x8/ , "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max" ;
specialize qw/vp9_minmax_8x8 sse2/ ;
2015-03-23 18:02:42 +01:00
add_proto qw/void vp9_hadamard_8x8/ , "int16_t const *src_diff, int src_stride, int16_t *coeff" ;
2015-06-30 01:44:30 +02:00
specialize qw/vp9_hadamard_8x8 sse2/ , "$ssse3_x86_64_x86inc" ;
2015-03-23 18:02:42 +01:00
2015-03-30 21:31:46 +02:00
add_proto qw/void vp9_hadamard_16x16/ , "int16_t const *src_diff, int src_stride, int16_t *coeff" ;
specialize qw/vp9_hadamard_16x16 sse2/ ;
2015-03-23 18:02:42 +01:00
add_proto qw/int16_t vp9_satd/ , "const int16_t *coeff, int length" ;
specialize qw/vp9_satd sse2/ ;
Integral projection based motion estimation
This commit introduces a new block match motion estimation
using integral projection measurement. The 2-D block and the nearby
region is projected onto the horizontal and vertical 1-D vectors,
respectively. It then runs vector match, instead of block match,
over the two separate 1-D vectors to locate the motion compensated
reference block.
This process is run per 64x64 block to align the reference before
choosing partitioning in speed 6. The overall CPU cycle cost due
to this additional 64x64 block match (SSE2 version) takes around 2%
at low bit-rate rtc speed 6. When strong motion activities exist in
the video sequence, it substantially improves the partition
selection accuracy, thereby achieving better compression performance
and lower CPU cycles.
The experiments were tested in RTC speed -6 setting:
cloud 1080p 500 kbps
17006 b/f, 37.086 dB, 5386 ms ->
16669 b/f, 37.970 dB, 5085 ms (>0.9dB gain and 6% faster)
pedestrian_area 1080p 500 kbps
53537 b/f, 36.771 dB, 18706 ms ->
51897 b/f, 36.792 dB, 18585 ms (4% bit-rate savings)
blue_sky 1080p 500 kbps
70214 b/f, 33.600 dB, 13979 ms ->
53885 b/f, 33.645 dB, 10878 ms (30% bit-rate savings, 25% faster)
jimred 400 kbps
13380 b/f, 36.014 dB, 5723 ms ->
13377 b/f, 36.087 dB, 5831 ms (2% bit-rate savings, 2% slower)
Change-Id: Iffdb6ea5b16b77016bfa3dd3904d284168ae649c
2015-02-13 20:23:45 +01:00
add_proto qw/void vp9_int_pro_row/ , "int16_t *hbuf, uint8_t const *ref, const int ref_stride, const int height" ;
2015-06-16 21:58:39 +02:00
specialize qw/vp9_int_pro_row sse2 neon/ ;
Integral projection based motion estimation
This commit introduces a new block match motion estimation
using integral projection measurement. The 2-D block and the nearby
region is projected onto the horizontal and vertical 1-D vectors,
respectively. It then runs vector match, instead of block match,
over the two separate 1-D vectors to locate the motion compensated
reference block.
This process is run per 64x64 block to align the reference before
choosing partitioning in speed 6. The overall CPU cycle cost due
to this additional 64x64 block match (SSE2 version) takes around 2%
at low bit-rate rtc speed 6. When strong motion activities exist in
the video sequence, it substantially improves the partition
selection accuracy, thereby achieving better compression performance
and lower CPU cycles.
The experiments were tested in RTC speed -6 setting:
cloud 1080p 500 kbps
17006 b/f, 37.086 dB, 5386 ms ->
16669 b/f, 37.970 dB, 5085 ms (>0.9dB gain and 6% faster)
pedestrian_area 1080p 500 kbps
53537 b/f, 36.771 dB, 18706 ms ->
51897 b/f, 36.792 dB, 18585 ms (4% bit-rate savings)
blue_sky 1080p 500 kbps
70214 b/f, 33.600 dB, 13979 ms ->
53885 b/f, 33.645 dB, 10878 ms (30% bit-rate savings, 25% faster)
jimred 400 kbps
13380 b/f, 36.014 dB, 5723 ms ->
13377 b/f, 36.087 dB, 5831 ms (2% bit-rate savings, 2% slower)
Change-Id: Iffdb6ea5b16b77016bfa3dd3904d284168ae649c
2015-02-13 20:23:45 +01:00
add_proto qw/int16_t vp9_int_pro_col/ , "uint8_t const *ref, const int width" ;
2015-07-15 18:04:28 +02:00
specialize qw/vp9_int_pro_col sse2 neon/ ;
Integral projection based motion estimation
This commit introduces a new block match motion estimation
using integral projection measurement. The 2-D block and the nearby
region is projected onto the horizontal and vertical 1-D vectors,
respectively. It then runs vector match, instead of block match,
over the two separate 1-D vectors to locate the motion compensated
reference block.
This process is run per 64x64 block to align the reference before
choosing partitioning in speed 6. The overall CPU cycle cost due
to this additional 64x64 block match (SSE2 version) takes around 2%
at low bit-rate rtc speed 6. When strong motion activities exist in
the video sequence, it substantially improves the partition
selection accuracy, thereby achieving better compression performance
and lower CPU cycles.
The experiments were tested in RTC speed -6 setting:
cloud 1080p 500 kbps
17006 b/f, 37.086 dB, 5386 ms ->
16669 b/f, 37.970 dB, 5085 ms (>0.9dB gain and 6% faster)
pedestrian_area 1080p 500 kbps
53537 b/f, 36.771 dB, 18706 ms ->
51897 b/f, 36.792 dB, 18585 ms (4% bit-rate savings)
blue_sky 1080p 500 kbps
70214 b/f, 33.600 dB, 13979 ms ->
53885 b/f, 33.645 dB, 10878 ms (30% bit-rate savings, 25% faster)
jimred 400 kbps
13380 b/f, 36.014 dB, 5723 ms ->
13377 b/f, 36.087 dB, 5831 ms (2% bit-rate savings, 2% slower)
Change-Id: Iffdb6ea5b16b77016bfa3dd3904d284168ae649c
2015-02-13 20:23:45 +01:00
2015-02-27 22:35:22 +01:00
add_proto qw/int vp9_vector_var/ , "int16_t const *ref, int16_t const *src, const int bwl" ;
2015-07-31 04:46:55 +02:00
specialize qw/vp9_vector_var neon sse2/ ;
Integral projection based motion estimation
This commit introduces a new block match motion estimation
using integral projection measurement. The 2-D block and the nearby
region is projected onto the horizontal and vertical 1-D vectors,
respectively. It then runs vector match, instead of block match,
over the two separate 1-D vectors to locate the motion compensated
reference block.
This process is run per 64x64 block to align the reference before
choosing partitioning in speed 6. The overall CPU cycle cost due
to this additional 64x64 block match (SSE2 version) takes around 2%
at low bit-rate rtc speed 6. When strong motion activities exist in
the video sequence, it substantially improves the partition
selection accuracy, thereby achieving better compression performance
and lower CPU cycles.
The experiments were tested in RTC speed -6 setting:
cloud 1080p 500 kbps
17006 b/f, 37.086 dB, 5386 ms ->
16669 b/f, 37.970 dB, 5085 ms (>0.9dB gain and 6% faster)
pedestrian_area 1080p 500 kbps
53537 b/f, 36.771 dB, 18706 ms ->
51897 b/f, 36.792 dB, 18585 ms (4% bit-rate savings)
blue_sky 1080p 500 kbps
70214 b/f, 33.600 dB, 13979 ms ->
53885 b/f, 33.645 dB, 10878 ms (30% bit-rate savings, 25% faster)
jimred 400 kbps
13380 b/f, 36.014 dB, 5723 ms ->
13377 b/f, 36.087 dB, 5831 ms (2% bit-rate savings, 2% slower)
Change-Id: Iffdb6ea5b16b77016bfa3dd3904d284168ae649c
2015-02-13 20:23:45 +01:00
2014-10-16 15:36:07 +02:00
if ( vpx_config ( "CONFIG_VP9_HIGHBITDEPTH" ) eq "yes" ) {
add_proto qw/unsigned int vp9_highbd_avg_8x8/ , "const uint8_t *, int p" ;
specialize qw/vp9_highbd_avg_8x8/ ;
2014-11-12 23:51:49 +01:00
add_proto qw/unsigned int vp9_highbd_avg_4x4/ , "const uint8_t *, int p" ;
specialize qw/vp9_highbd_avg_4x4/ ;
2015-05-15 05:08:03 +02:00
add_proto qw/void vp9_highbd_minmax_8x8/ , "const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max" ;
2015-04-16 02:48:20 +02:00
specialize qw/vp9_highbd_minmax_8x8/ ;
2014-10-16 15:36:07 +02:00
}
2014-02-24 01:33:14 +01:00
# ENCODEMB INVOKE
2014-09-19 01:45:53 +02:00
#
# Denoiser
#
if ( vpx_config ( "CONFIG_VP9_TEMPORAL_DENOISING" ) eq "yes" ) {
add_proto qw/int vp9_denoiser_filter/ , "const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude" ;
specialize qw/vp9_denoiser_filter sse2/ ;
}
2014-09-03 01:34:09 +02:00
if ( vpx_config ( "CONFIG_VP9_HIGHBITDEPTH" ) eq "yes" ) {
# the transform coefficients are held in 32-bit
# values, so the assembler code for vp9_block_error can no longer be used.
add_proto qw/int64_t vp9_block_error/ , "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz" ;
specialize qw/vp9_block_error/ ;
2014-07-02 01:10:44 +02:00
2014-12-22 18:35:29 +01:00
add_proto qw/void vp9_quantize_fp/ , "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" ;
2014-09-03 01:34:09 +02:00
specialize qw/vp9_quantize_fp/ ;
2014-07-07 21:08:40 +02:00
2014-12-22 18:35:29 +01:00
add_proto qw/void vp9_quantize_fp_32x32/ , "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" ;
2014-09-03 01:34:09 +02:00
specialize qw/vp9_quantize_fp_32x32/ ;
2014-02-24 01:33:14 +01:00
2014-12-22 18:35:29 +01:00
add_proto qw/void vp9_fdct8x8_quant/ , "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" ;
2014-12-04 11:51:10 +01:00
specialize qw/vp9_fdct8x8_quant/ ;
2014-09-03 01:34:09 +02:00
} else {
add_proto qw/int64_t vp9_block_error/ , "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz" ;
2015-06-23 09:18:50 +02:00
specialize qw/vp9_block_error avx2 msa/ , "$sse2_x86inc" ;
2014-09-03 01:34:09 +02:00
2015-04-01 02:46:41 +02:00
add_proto qw/int64_t vp9_block_error_fp/ , "const int16_t *coeff, const int16_t *dqcoeff, int block_size" ;
2015-07-25 21:27:56 +02:00
specialize qw/vp9_block_error_fp neon/ , "$sse2_x86inc" ;
2015-04-01 02:46:41 +02:00
2014-12-22 18:35:29 +01:00
add_proto qw/void vp9_quantize_fp/ , "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" ;
2015-06-30 01:44:30 +02:00
specialize qw/vp9_quantize_fp neon sse2/ , "$ssse3_x86_64_x86inc" ;
2014-09-03 01:34:09 +02:00
2014-12-22 18:35:29 +01:00
add_proto qw/void vp9_quantize_fp_32x32/ , "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" ;
2015-06-30 01:44:30 +02:00
specialize qw/vp9_quantize_fp_32x32/ , "$ssse3_x86_64_x86inc" ;
2014-09-03 01:34:09 +02:00
2014-12-22 18:35:29 +01:00
add_proto qw/void vp9_fdct8x8_quant/ , "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" ;
2015-01-16 04:29:46 +01:00
specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/ ;
2014-09-03 01:34:09 +02:00
}
2014-02-24 01:33:14 +01:00
# fdct functions
2014-09-03 01:34:09 +02:00
if ( vpx_config ( "CONFIG_VP9_HIGHBITDEPTH" ) eq "yes" ) {
add_proto qw/void vp9_fht4x4/ , "const int16_t *input, tran_low_t *output, int stride, int tx_type" ;
2014-10-24 09:48:02 +02:00
specialize qw/vp9_fht4x4 sse2/ ;
2014-09-03 01:34:09 +02:00
add_proto qw/void vp9_fht8x8/ , "const int16_t *input, tran_low_t *output, int stride, int tx_type" ;
2014-10-24 09:48:02 +02:00
specialize qw/vp9_fht8x8 sse2/ ;
2014-09-03 01:34:09 +02:00
add_proto qw/void vp9_fht16x16/ , "const int16_t *input, tran_low_t *output, int stride, int tx_type" ;
2014-10-24 09:48:02 +02:00
specialize qw/vp9_fht16x16 sse2/ ;
2014-09-03 01:34:09 +02:00
add_proto qw/void vp9_fwht4x4/ , "const int16_t *input, tran_low_t *output, int stride" ;
2014-10-24 09:48:02 +02:00
specialize qw/vp9_fwht4x4/ , "$mmx_x86inc" ;
2014-09-03 01:34:09 +02:00
} else {
add_proto qw/void vp9_fht4x4/ , "const int16_t *input, tran_low_t *output, int stride, int tx_type" ;
2015-06-22 11:00:24 +02:00
specialize qw/vp9_fht4x4 sse2 msa/ ;
2014-09-03 01:34:09 +02:00
add_proto qw/void vp9_fht8x8/ , "const int16_t *input, tran_low_t *output, int stride, int tx_type" ;
2015-06-18 08:33:30 +02:00
specialize qw/vp9_fht8x8 sse2 msa/ ;
2014-09-03 01:34:09 +02:00
add_proto qw/void vp9_fht16x16/ , "const int16_t *input, tran_low_t *output, int stride, int tx_type" ;
2015-06-13 06:48:47 +02:00
specialize qw/vp9_fht16x16 sse2 msa/ ;
2014-09-03 01:34:09 +02:00
add_proto qw/void vp9_fwht4x4/ , "const int16_t *input, tran_low_t *output, int stride" ;
2015-06-22 11:00:24 +02:00
specialize qw/vp9_fwht4x4 msa/ , "$mmx_x86inc" ;
2014-09-03 01:34:09 +02:00
}
2014-02-24 01:33:14 +01:00
#
# Motion search
#
2014-04-11 08:28:52 +02:00
add_proto qw/int vp9_full_search_sad/ , "const struct macroblock *x, const struct mv *ref_mv, int sad_per_bit, int distance, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv, struct mv *best_mv" ;
2014-02-24 01:33:14 +01:00
specialize qw/vp9_full_search_sad sse3 sse4_1/ ;
$ vp9_full_search_sad_sse3 = vp9_full_search_sadx3 ;
$ vp9_full_search_sad_sse4_1 = vp9_full_search_sadx8 ;
2014-04-26 03:22:27 +02:00
add_proto qw/int vp9_diamond_search_sad/ , "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv" ;
2014-07-10 18:19:03 +02:00
specialize qw/vp9_diamond_search_sad/ ;
2014-02-24 01:33:14 +01:00
2014-04-26 03:22:27 +02:00
add_proto qw/int vp9_full_range_search/ , "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv" ;
2014-02-24 01:33:14 +01:00
specialize qw/vp9_full_range_search/ ;
2014-05-20 19:48:54 +02:00
add_proto qw/void vp9_temporal_filter_apply/ , "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count" ;
2015-06-26 08:30:24 +02:00
specialize qw/vp9_temporal_filter_apply sse2 msa/ ;
2014-02-24 01:33:14 +01:00
2014-09-06 00:00:54 +02:00
if ( vpx_config ( "CONFIG_VP9_HIGHBITDEPTH" ) eq "yes" ) {
2014-09-03 01:34:09 +02:00
# ENCODEMB INVOKE
2014-10-08 21:43:22 +02:00
add_proto qw/int64_t vp9_highbd_block_error/ , "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd" ;
2014-10-16 14:38:46 +02:00
specialize qw/vp9_highbd_block_error sse2/ ;
2014-09-03 01:34:09 +02:00
2014-12-22 18:35:29 +01:00
add_proto qw/void vp9_highbd_quantize_fp/ , "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" ;
2014-10-08 21:43:22 +02:00
specialize qw/vp9_highbd_quantize_fp/ ;
2014-09-03 01:34:09 +02:00
2014-12-22 18:35:29 +01:00
add_proto qw/void vp9_highbd_quantize_fp_32x32/ , "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" ;
2014-10-08 21:43:22 +02:00
specialize qw/vp9_highbd_quantize_fp_32x32/ ;
2014-09-03 01:34:09 +02:00
# fdct functions
2014-10-08 21:43:22 +02:00
add_proto qw/void vp9_highbd_fht4x4/ , "const int16_t *input, tran_low_t *output, int stride, int tx_type" ;
2015-07-20 20:07:22 +02:00
specialize qw/vp9_highbd_fht4x4/ ;
2014-09-03 01:34:09 +02:00
2014-10-08 21:43:22 +02:00
add_proto qw/void vp9_highbd_fht8x8/ , "const int16_t *input, tran_low_t *output, int stride, int tx_type" ;
2015-07-20 20:07:22 +02:00
specialize qw/vp9_highbd_fht8x8/ ;
2014-09-03 01:34:09 +02:00
2014-10-08 21:43:22 +02:00
add_proto qw/void vp9_highbd_fht16x16/ , "const int16_t *input, tran_low_t *output, int stride, int tx_type" ;
2015-07-20 20:07:22 +02:00
specialize qw/vp9_highbd_fht16x16/ ;
2014-09-03 01:34:09 +02:00
2014-10-08 21:43:22 +02:00
add_proto qw/void vp9_highbd_fwht4x4/ , "const int16_t *input, tran_low_t *output, int stride" ;
specialize qw/vp9_highbd_fwht4x4/ ;
2014-09-03 01:34:09 +02:00
2014-09-18 01:55:05 +02:00
add_proto qw/void vp9_highbd_temporal_filter_apply/ , "uint8_t *frame1, unsigned int stride, uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count" ;
specialize qw/vp9_highbd_temporal_filter_apply/ ;
2014-09-03 01:34:09 +02:00
2014-09-06 00:00:54 +02:00
}
2014-09-03 01:34:09 +02:00
# End vp9_high encoder functions
2014-09-06 00:00:54 +02:00
2014-02-24 01:33:14 +01:00
}
# end encoder functions
1 ;