vpx/vp8/common/rtcd_defs.sh

common_forward_decls() {
cat <<EOF
struct blockd;
struct loop_filter_info;
EOF
}
forward_decls common_forward_decls

#
# Dequant
#
prototype void vp8_dequantize_b "struct blockd*, short *dqc"
specialize vp8_dequantize_b mmx media neon
vp8_dequantize_b_media=vp8_dequantize_b_v6

prototype void vp8_dequant_idct_add "short *input, short *dq, unsigned char *output, int stride"
specialize vp8_dequant_idct_add mmx media neon
vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6

prototype void vp8_dequant_idct_add_y_block "short *q, short *dq, unsigned char *dst, int stride, char *eobs"
specialize vp8_dequant_idct_add_y_block mmx sse2 media neon
vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6

prototype void vp8_dequant_idct_add_uv_block "short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs"
specialize vp8_dequant_idct_add_uv_block mmx sse2 media neon
vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6

#
# Loopfilter
#
prototype void vp8_loop_filter_mbv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
specialize vp8_loop_filter_mbv mmx sse2 media neon
vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6

prototype void vp8_loop_filter_bv "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
specialize vp8_loop_filter_bv mmx sse2 media neon
vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6

prototype void vp8_loop_filter_mbh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
specialize vp8_loop_filter_mbh mmx sse2 media neon
vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6

prototype void vp8_loop_filter_bh "unsigned char *y, unsigned char *u, unsigned char *v, int ystride, int uv_stride, struct loop_filter_info *lfi"
specialize vp8_loop_filter_bh mmx sse2 media neon
vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6


prototype void vp8_loop_filter_simple_mbv "unsigned char *y, int ystride, const unsigned char *blimit"
specialize vp8_loop_filter_simple_mbv mmx sse2 media neon
vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c
vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx
vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2
vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6
vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon

prototype void vp8_loop_filter_simple_mbh "unsigned char *y, int ystride, const unsigned char *blimit"
specialize vp8_loop_filter_simple_mbh mmx sse2 media neon
vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c
vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx
vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2
vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6
vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon

prototype void vp8_loop_filter_simple_bv "unsigned char *y, int ystride, const unsigned char *blimit"
specialize vp8_loop_filter_simple_bv mmx sse2 media neon
vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c
vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx
vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2
vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6
vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon

prototype void vp8_loop_filter_simple_bh "unsigned char *y, int ystride, const unsigned char *blimit"
specialize vp8_loop_filter_simple_bh mmx sse2 media neon
vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c
vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx
vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2
vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6
vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon

#
# IDCT
#
#idct16
prototype void vp8_short_idct4x4llm "short *input, unsigned char *pred, int pitch, unsigned char *dst, int dst_stride"
specialize vp8_short_idct4x4llm mmx media neon
vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual

#iwalsh1
prototype void vp8_short_inv_walsh4x4_1 "short *input, short *output"
specialize vp8_short_inv_walsh4x4_1 c #no asm yet

#iwalsh16
prototype void vp8_short_inv_walsh4x4 "short *input, short *output"
specialize vp8_short_inv_walsh4x4 mmx sse2 media neon
vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6

#idct1_scalar_add
prototype void vp8_dc_only_idct_add "short input, unsigned char *pred, int pred_stride, unsigned char *dst, int dst_stride"
specialize vp8_dc_only_idct_add	mmx media neon
vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6
New RTCD implementation This is a proof of concept RTCD implementation to replace the current system of nested includes, prototypes, INVOKE macros, etc. Currently only the decoder specific functions are implemented in the new system. Additional functions will be added in subsequent commits. Overview: RTCD "functions" are implemented as either a global function pointer or a macro (when only one eligible specialization available). Functions which have RTCD specializations are listed using a simple DSL identifying the function's base name, its prototype, and the architecture extensions that specializations are available for. Advantages over the old system: - No INVOKE macros. A call to an RTCD function looks like an ordinary function call. - No need to pass vtables around. - If there is only one eligible function to call, the function is called directly, rather than indirecting through a function pointer. - Supports the notion of "required" extensions, so in combination with the above, on x86_64 if the best function available is sse2 or lower it will be called directly, since all x86_64 platforms implement sse2. - Elides all references to functions which will never be called, which could reduce binary size. For example if sse2 is required and there are both mmx and sse2 implementations of a certain function, the code will have no link time references to the mmx code. - Significantly easier to add a new function, just one file to edit. Disadvantages: - Requires global writable data (though this is not a new requirement) - 1 new generated source file. Change-Id: Iae6edab65315f79c168485c96872641c5aa09d55 2011-08-19 20:06:00 +02:00			`common_forward_decls() {`
			`cat <<EOF`
			`struct blockd;`
RTCD: add loopfilter functions This commit continues the process of converting to the new RTCD system. Change-Id: Ic8a4047d72ff3a54ec98977dd90e70c13213db71 2012-01-13 01:15:42 +01:00			`struct loop_filter_info;`
New RTCD implementation This is a proof of concept RTCD implementation to replace the current system of nested includes, prototypes, INVOKE macros, etc. Currently only the decoder specific functions are implemented in the new system. Additional functions will be added in subsequent commits. Overview: RTCD "functions" are implemented as either a global function pointer or a macro (when only one eligible specialization available). Functions which have RTCD specializations are listed using a simple DSL identifying the function's base name, its prototype, and the architecture extensions that specializations are available for. Advantages over the old system: - No INVOKE macros. A call to an RTCD function looks like an ordinary function call. - No need to pass vtables around. - If there is only one eligible function to call, the function is called directly, rather than indirecting through a function pointer. - Supports the notion of "required" extensions, so in combination with the above, on x86_64 if the best function available is sse2 or lower it will be called directly, since all x86_64 platforms implement sse2. - Elides all references to functions which will never be called, which could reduce binary size. For example if sse2 is required and there are both mmx and sse2 implementations of a certain function, the code will have no link time references to the mmx code. - Significantly easier to add a new function, just one file to edit. Disadvantages: - Requires global writable data (though this is not a new requirement) - 1 new generated source file. Change-Id: Iae6edab65315f79c168485c96872641c5aa09d55 2011-08-19 20:06:00 +02:00			`EOF`
			`}`
			`forward_decls common_forward_decls`

RTCD: add loopfilter functions This commit continues the process of converting to the new RTCD system. Change-Id: Ic8a4047d72ff3a54ec98977dd90e70c13213db71 2012-01-13 01:15:42 +01:00			`#`
			`# Dequant`
			`#`
New RTCD implementation This is a proof of concept RTCD implementation to replace the current system of nested includes, prototypes, INVOKE macros, etc. Currently only the decoder specific functions are implemented in the new system. Additional functions will be added in subsequent commits. Overview: RTCD "functions" are implemented as either a global function pointer or a macro (when only one eligible specialization available). Functions which have RTCD specializations are listed using a simple DSL identifying the function's base name, its prototype, and the architecture extensions that specializations are available for. Advantages over the old system: - No INVOKE macros. A call to an RTCD function looks like an ordinary function call. - No need to pass vtables around. - If there is only one eligible function to call, the function is called directly, rather than indirecting through a function pointer. - Supports the notion of "required" extensions, so in combination with the above, on x86_64 if the best function available is sse2 or lower it will be called directly, since all x86_64 platforms implement sse2. - Elides all references to functions which will never be called, which could reduce binary size. For example if sse2 is required and there are both mmx and sse2 implementations of a certain function, the code will have no link time references to the mmx code. - Significantly easier to add a new function, just one file to edit. Disadvantages: - Requires global writable data (though this is not a new requirement) - 1 new generated source file. Change-Id: Iae6edab65315f79c168485c96872641c5aa09d55 2011-08-19 20:06:00 +02:00			`prototype void vp8_dequantize_b "struct blockd, short dqc"`
			`specialize vp8_dequantize_b mmx media neon`
			`vp8_dequantize_b_media=vp8_dequantize_b_v6`

			`prototype void vp8_dequant_idct_add "short input, short dq, unsigned char *output, int stride"`
			`specialize vp8_dequant_idct_add mmx media neon`
			`vp8_dequant_idct_add_media=vp8_dequant_idct_add_v6`

			`prototype void vp8_dequant_idct_add_y_block "short q, short dq, unsigned char dst, int stride, char eobs"`
			`specialize vp8_dequant_idct_add_y_block mmx sse2 media neon`
			`vp8_dequant_idct_add_y_block_media=vp8_dequant_idct_add_y_block_v6`

			`prototype void vp8_dequant_idct_add_uv_block "short q, short dq, unsigned char dst_u, unsigned char dst_v, int stride, char *eobs"`
			`specialize vp8_dequant_idct_add_uv_block mmx sse2 media neon`
			`vp8_dequant_idct_add_uv_block_media=vp8_dequant_idct_add_uv_block_v6`
RTCD: add loopfilter functions This commit continues the process of converting to the new RTCD system. Change-Id: Ic8a4047d72ff3a54ec98977dd90e70c13213db71 2012-01-13 01:15:42 +01:00
			`#`
			`# Loopfilter`
			`#`
			`prototype void vp8_loop_filter_mbv "unsigned char y, unsigned char u, unsigned char v, int ystride, int uv_stride, struct loop_filter_info lfi"`
			`specialize vp8_loop_filter_mbv mmx sse2 media neon`
			`vp8_loop_filter_mbv_media=vp8_loop_filter_mbv_armv6`

			`prototype void vp8_loop_filter_bv "unsigned char y, unsigned char u, unsigned char v, int ystride, int uv_stride, struct loop_filter_info lfi"`
			`specialize vp8_loop_filter_bv mmx sse2 media neon`
			`vp8_loop_filter_bv_media=vp8_loop_filter_bv_armv6`

			`prototype void vp8_loop_filter_mbh "unsigned char y, unsigned char u, unsigned char v, int ystride, int uv_stride, struct loop_filter_info lfi"`
			`specialize vp8_loop_filter_mbh mmx sse2 media neon`
			`vp8_loop_filter_mbh_media=vp8_loop_filter_mbh_armv6`

			`prototype void vp8_loop_filter_bh "unsigned char y, unsigned char u, unsigned char v, int ystride, int uv_stride, struct loop_filter_info lfi"`
			`specialize vp8_loop_filter_bh mmx sse2 media neon`
			`vp8_loop_filter_bh_media=vp8_loop_filter_bh_armv6`


			`prototype void vp8_loop_filter_simple_mbv "unsigned char y, int ystride, const unsigned char blimit"`
			`specialize vp8_loop_filter_simple_mbv mmx sse2 media neon`
			`vp8_loop_filter_simple_mbv_c=vp8_loop_filter_simple_vertical_edge_c`
			`vp8_loop_filter_simple_mbv_mmx=vp8_loop_filter_simple_vertical_edge_mmx`
			`vp8_loop_filter_simple_mbv_sse2=vp8_loop_filter_simple_vertical_edge_sse2`
			`vp8_loop_filter_simple_mbv_media=vp8_loop_filter_simple_vertical_edge_armv6`
			`vp8_loop_filter_simple_mbv_neon=vp8_loop_filter_mbvs_neon`

			`prototype void vp8_loop_filter_simple_mbh "unsigned char y, int ystride, const unsigned char blimit"`
			`specialize vp8_loop_filter_simple_mbh mmx sse2 media neon`
			`vp8_loop_filter_simple_mbh_c=vp8_loop_filter_simple_horizontal_edge_c`
			`vp8_loop_filter_simple_mbh_mmx=vp8_loop_filter_simple_horizontal_edge_mmx`
			`vp8_loop_filter_simple_mbh_sse2=vp8_loop_filter_simple_horizontal_edge_sse2`
			`vp8_loop_filter_simple_mbh_media=vp8_loop_filter_simple_horizontal_edge_armv6`
			`vp8_loop_filter_simple_mbh_neon=vp8_loop_filter_mbhs_neon`

			`prototype void vp8_loop_filter_simple_bv "unsigned char y, int ystride, const unsigned char blimit"`
			`specialize vp8_loop_filter_simple_bv mmx sse2 media neon`
			`vp8_loop_filter_simple_bv_c=vp8_loop_filter_bvs_c`
			`vp8_loop_filter_simple_bv_mmx=vp8_loop_filter_bvs_mmx`
			`vp8_loop_filter_simple_bv_sse2=vp8_loop_filter_bvs_sse2`
			`vp8_loop_filter_simple_bv_media=vp8_loop_filter_bvs_armv6`
			`vp8_loop_filter_simple_bv_neon=vp8_loop_filter_bvs_neon`

			`prototype void vp8_loop_filter_simple_bh "unsigned char y, int ystride, const unsigned char blimit"`
			`specialize vp8_loop_filter_simple_bh mmx sse2 media neon`
			`vp8_loop_filter_simple_bh_c=vp8_loop_filter_bhs_c`
			`vp8_loop_filter_simple_bh_mmx=vp8_loop_filter_bhs_mmx`
			`vp8_loop_filter_simple_bh_sse2=vp8_loop_filter_bhs_sse2`
			`vp8_loop_filter_simple_bh_media=vp8_loop_filter_bhs_armv6`
			`vp8_loop_filter_simple_bh_neon=vp8_loop_filter_bhs_neon`
RTCD: add remaining IDCT functions This commit continues the process of converting to the new RTCD system. Change-Id: I03c4dbf30dfd3558b0e256ff9d3ff4c012aadc80 2012-01-13 01:55:44 +01:00
			`#`
			`# IDCT`
			`#`
			`#idct16`
			`prototype void vp8_short_idct4x4llm "short input, unsigned char pred, int pitch, unsigned char *dst, int dst_stride"`
			`specialize vp8_short_idct4x4llm mmx media neon`
			`vp8_short_idct4x4llm_media=vp8_short_idct4x4llm_v6_dual`

			`#iwalsh1`
			`prototype void vp8_short_inv_walsh4x4_1 "short input, short output"`
			`specialize vp8_short_inv_walsh4x4_1 c #no asm yet`

			`#iwalsh16`
			`prototype void vp8_short_inv_walsh4x4 "short input, short output"`
			`specialize vp8_short_inv_walsh4x4 mmx sse2 media neon`
			`vp8_short_inv_walsh4x4_media=vp8_short_inv_walsh4x4_v6`

			`#idct1_scalar_add`
			`prototype void vp8_dc_only_idct_add "short input, unsigned char pred, int pred_stride, unsigned char dst, int dst_stride"`
			`specialize vp8_dc_only_idct_add mmx media neon`
			`vp8_dc_only_idct_add_media=vp8_dc_only_idct_add_v6`