Modify calling of NEON code in sub-pixel search
In vp8_find_best_sub_pixel_step_iteratively(), many times xoffset and yoffset are specific values - (4,0) (0,4) and (4,4). Modified code to call simplified NEON version at these specific offsets to help with the performance. Change-Id: Iaf896a0f7aae4697bd36a49e182525dd1ef1ab4d
This commit is contained in:
parent
edcf74c6ad
commit
ce6c954d2e
@ -18,6 +18,37 @@ extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12
|
||||
extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
|
||||
extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
|
||||
|
||||
extern unsigned int vp8_sub_pixel_variance16x16_neon_func
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
);
|
||||
unsigned int vp8_sub_pixel_variance16x16_neon
|
||||
(
|
||||
const unsigned char *src_ptr,
|
||||
int src_pixels_per_line,
|
||||
int xoffset,
|
||||
int yoffset,
|
||||
const unsigned char *dst_ptr,
|
||||
int dst_pixels_per_line,
|
||||
unsigned int *sse
|
||||
)
|
||||
{
|
||||
if (xoffset == 4 && yoffset == 0)
|
||||
return vp8_variance_halfpixvar16x16_h_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
|
||||
else if (xoffset == 0 && yoffset == 4)
|
||||
return vp8_variance_halfpixvar16x16_v_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
|
||||
else if (xoffset == 4 && yoffset == 4)
|
||||
return vp8_variance_halfpixvar16x16_hv_neon(src_ptr, src_pixels_per_line, dst_ptr, dst_pixels_per_line, sse);
|
||||
else
|
||||
return vp8_sub_pixel_variance16x16_neon_func(src_ptr, src_pixels_per_line, xoffset, yoffset, dst_ptr, dst_pixels_per_line, sse);
|
||||
}
|
||||
|
||||
void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
|
||||
{
|
||||
#if CONFIG_RUNTIME_CPU_DETECT
|
||||
|
@ -9,7 +9,7 @@
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp8_sub_pixel_variance16x16_neon|
|
||||
EXPORT |vp8_sub_pixel_variance16x16_neon_func|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
@ -24,7 +24,7 @@
|
||||
; stack(r6) unsigned int *sse
|
||||
;note: most of the code is copied from bilinear_predict16x16_neon and vp8_variance16x16_neon.
|
||||
|
||||
|vp8_sub_pixel_variance16x16_neon| PROC
|
||||
|vp8_sub_pixel_variance16x16_neon_func| PROC
|
||||
push {r4-r6, lr}
|
||||
|
||||
ldr r12, _BilinearTaps_coeff_
|
||||
|
Loading…
x
Reference in New Issue
Block a user