Merge "VPX: removed step checks from neon convolve code"
This commit is contained in:
commit
78629508f2
@ -9,23 +9,13 @@
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h);
|
||||
void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h);
|
||||
|
||||
static INLINE int32x4_t MULTIPLY_BY_Q0(
|
||||
int16x4_t dsrc0,
|
||||
int16x4_t dsrc1,
|
||||
@ -82,12 +72,7 @@ void vpx_convolve8_avg_horiz_neon(
|
||||
uint16x4x2_t d0x2u16, d1x2u16;
|
||||
uint32x4x2_t q0x2u32;
|
||||
|
||||
if (x_step_q4 != 16) {
|
||||
vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4,
|
||||
filter_y, y_step_q4, w, h);
|
||||
return;
|
||||
}
|
||||
assert(x_step_q4 == 16);
|
||||
|
||||
q0s16 = vld1q_s16(filter_x);
|
||||
|
||||
@ -271,12 +256,7 @@ void vpx_convolve8_avg_vert_neon(
|
||||
uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
|
||||
int32x4_t q1s32, q2s32, q14s32, q15s32;
|
||||
|
||||
if (y_step_q4 != 16) {
|
||||
vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4,
|
||||
filter_y, y_step_q4, w, h);
|
||||
return;
|
||||
}
|
||||
assert(y_step_q4 == 16);
|
||||
|
||||
src -= src_stride * 3;
|
||||
q0s16 = vld1q_s16(filter_y);
|
||||
|
@ -19,8 +19,6 @@
|
||||
|
||||
EXPORT |vpx_convolve8_avg_horiz_neon|
|
||||
EXPORT |vpx_convolve8_avg_vert_neon|
|
||||
IMPORT |vpx_convolve8_avg_horiz_c|
|
||||
IMPORT |vpx_convolve8_avg_vert_c|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
@ -52,10 +50,6 @@
|
||||
; sp[]int h
|
||||
|
||||
|vpx_convolve8_avg_horiz_neon| PROC
|
||||
ldr r12, [sp, #4] ; x_step_q4
|
||||
cmp r12, #16
|
||||
bne vpx_convolve8_avg_horiz_c
|
||||
|
||||
push {r4-r10, lr}
|
||||
|
||||
sub r0, r0, #3 ; adjust for taps
|
||||
@ -184,10 +178,6 @@ vpx_convolve8_avg_loop_horiz
|
||||
ENDP
|
||||
|
||||
|vpx_convolve8_avg_vert_neon| PROC
|
||||
ldr r12, [sp, #12]
|
||||
cmp r12, #16
|
||||
bne vpx_convolve8_avg_vert_c
|
||||
|
||||
push {r4-r8, lr}
|
||||
|
||||
; adjust for taps
|
||||
|
@ -9,23 +9,13 @@
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "./vpx_config.h"
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vpx/vpx_integer.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
|
||||
void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h);
|
||||
void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
|
||||
uint8_t *dst, ptrdiff_t dst_stride,
|
||||
const int16_t *filter_x, int x_step_q4,
|
||||
const int16_t *filter_y, int y_step_q4,
|
||||
int w, int h);
|
||||
|
||||
static INLINE int32x4_t MULTIPLY_BY_Q0(
|
||||
int16x4_t dsrc0,
|
||||
int16x4_t dsrc1,
|
||||
@ -82,12 +72,7 @@ void vpx_convolve8_horiz_neon(
|
||||
uint16x4x2_t d0x2u16, d1x2u16;
|
||||
uint32x4x2_t q0x2u32;
|
||||
|
||||
if (x_step_q4 != 16) {
|
||||
vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4,
|
||||
filter_y, y_step_q4, w, h);
|
||||
return;
|
||||
}
|
||||
assert(x_step_q4 == 16);
|
||||
|
||||
q0s16 = vld1q_s16(filter_x);
|
||||
|
||||
@ -255,12 +240,7 @@ void vpx_convolve8_vert_neon(
|
||||
uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
|
||||
int32x4_t q1s32, q2s32, q14s32, q15s32;
|
||||
|
||||
if (y_step_q4 != 16) {
|
||||
vpx_convolve8_vert_c(src, src_stride, dst, dst_stride,
|
||||
filter_x, x_step_q4,
|
||||
filter_y, y_step_q4, w, h);
|
||||
return;
|
||||
}
|
||||
assert(y_step_q4 == 16);
|
||||
|
||||
src -= src_stride * 3;
|
||||
q0s16 = vld1q_s16(filter_y);
|
||||
|
@ -19,8 +19,6 @@
|
||||
|
||||
EXPORT |vpx_convolve8_horiz_neon|
|
||||
EXPORT |vpx_convolve8_vert_neon|
|
||||
IMPORT |vpx_convolve8_horiz_c|
|
||||
IMPORT |vpx_convolve8_vert_c|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
@ -52,10 +50,6 @@
|
||||
; sp[]int h
|
||||
|
||||
|vpx_convolve8_horiz_neon| PROC
|
||||
ldr r12, [sp, #4] ; x_step_q4
|
||||
cmp r12, #16
|
||||
bne vpx_convolve8_horiz_c
|
||||
|
||||
push {r4-r10, lr}
|
||||
|
||||
sub r0, r0, #3 ; adjust for taps
|
||||
@ -173,10 +167,6 @@ vpx_convolve8_loop_horiz
|
||||
ENDP
|
||||
|
||||
|vpx_convolve8_vert_neon| PROC
|
||||
ldr r12, [sp, #12]
|
||||
cmp r12, #16
|
||||
bne vpx_convolve8_vert_c
|
||||
|
||||
push {r4-r8, lr}
|
||||
|
||||
; adjust for taps
|
||||
|
@ -8,6 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "./vpx_dsp_rtcd.h"
|
||||
#include "vpx_dsp/vpx_dsp_common.h"
|
||||
#include "vpx_ports/mem.h"
|
||||
@ -25,14 +27,8 @@ void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||
// Account for the vertical phase needing 3 lines prior and 4 lines post
|
||||
int intermediate_height = h + 7;
|
||||
|
||||
if (x_step_q4 != 16 || y_step_q4 != 16) {
|
||||
vpx_convolve8_c(src, src_stride,
|
||||
dst, dst_stride,
|
||||
filter_x, x_step_q4,
|
||||
filter_y, y_step_q4,
|
||||
w, h);
|
||||
return;
|
||||
}
|
||||
assert(y_step_q4 == 16);
|
||||
assert(x_step_q4 == 16);
|
||||
|
||||
/* Filter starting 3 lines back. The neon implementation will ignore the
|
||||
* given height and filter a multiple of 4 lines. Since this goes in to
|
||||
@ -59,14 +55,8 @@ void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
|
||||
DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]);
|
||||
int intermediate_height = h + 7;
|
||||
|
||||
if (x_step_q4 != 16 || y_step_q4 != 16) {
|
||||
vpx_convolve8_avg_c(src, src_stride,
|
||||
dst, dst_stride,
|
||||
filter_x, x_step_q4,
|
||||
filter_y, y_step_q4,
|
||||
w, h);
|
||||
return;
|
||||
}
|
||||
assert(y_step_q4 == 16);
|
||||
assert(x_step_q4 == 16);
|
||||
|
||||
/* This implementation has the same issues as above. In addition, we only want
|
||||
* to average the values after both passes.
|
||||
|
Loading…
x
Reference in New Issue
Block a user