Merge "Use defines for inline and __builtin_prefetch"

This commit is contained in:
Johann 2014-12-16 18:04:04 -08:00 committed by Gerrit Code Review
commit ebc1951c7c
9 changed files with 43 additions and 25 deletions

View File

@ -11,6 +11,9 @@
#include <stddef.h> #include <stddef.h>
#include <arm_neon.h> #include <arm_neon.h>
#include "./vpx_config.h"
#include "vpx_ports/mem.h"
void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, void vp9_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride, uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4, const int16_t *filter_x, int x_step_q4,
@ -22,7 +25,7 @@ void vp9_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
const int16_t *filter_y, int y_step_q4, const int16_t *filter_y, int y_step_q4,
int w, int h); int w, int h);
static inline int32x4_t MULTIPLY_BY_Q0( static INLINE int32x4_t MULTIPLY_BY_Q0(
int16x4_t dsrc0, int16x4_t dsrc0,
int16x4_t dsrc1, int16x4_t dsrc1,
int16x4_t dsrc2, int16x4_t dsrc2,

View File

@ -11,6 +11,9 @@
#include <stddef.h> #include <stddef.h>
#include <arm_neon.h> #include <arm_neon.h>
#include "./vpx_config.h"
#include "vpx_ports/mem.h"
void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, void vp9_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride, uint8_t *dst, ptrdiff_t dst_stride,
const int16_t *filter_x, int x_step_q4, const int16_t *filter_x, int x_step_q4,
@ -22,7 +25,7 @@ void vp9_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
const int16_t *filter_y, int y_step_q4, const int16_t *filter_y, int y_step_q4,
int w, int h); int w, int h);
static inline int32x4_t MULTIPLY_BY_Q0( static INLINE int32x4_t MULTIPLY_BY_Q0(
int16x4_t dsrc0, int16x4_t dsrc0,
int16x4_t dsrc1, int16x4_t dsrc1,
int16x4_t dsrc2, int16x4_t dsrc2,

View File

@ -10,6 +10,8 @@
#include <arm_neon.h> #include <arm_neon.h>
#include "./vpx_config.h"
static int16_t cospi_2_64 = 16305; static int16_t cospi_2_64 = 16305;
static int16_t cospi_4_64 = 16069; static int16_t cospi_4_64 = 16069;
static int16_t cospi_6_64 = 15679; static int16_t cospi_6_64 = 15679;
@ -26,7 +28,7 @@ static int16_t cospi_26_64 = 4756;
static int16_t cospi_28_64 = 3196; static int16_t cospi_28_64 = 3196;
static int16_t cospi_30_64 = 1606; static int16_t cospi_30_64 = 1606;
static inline void TRANSPOSE8X8( static INLINE void TRANSPOSE8X8(
int16x8_t *q8s16, int16x8_t *q8s16,
int16x8_t *q9s16, int16x8_t *q9s16,
int16x8_t *q10s16, int16x8_t *q10s16,

View File

@ -11,7 +11,9 @@
#include <arm_neon.h> #include <arm_neon.h>
#include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_idct.h"
static inline void LD_16x8( #include "./vpx_config.h"
static INLINE void LD_16x8(
uint8_t *d, uint8_t *d,
int d_stride, int d_stride,
uint8x16_t *q8u8, uint8x16_t *q8u8,
@ -40,7 +42,7 @@ static inline void LD_16x8(
return; return;
} }
static inline void ADD_DIFF_16x8( static INLINE void ADD_DIFF_16x8(
uint8x16_t qdiffu8, uint8x16_t qdiffu8,
uint8x16_t *q8u8, uint8x16_t *q8u8,
uint8x16_t *q9u8, uint8x16_t *q9u8,
@ -61,7 +63,7 @@ static inline void ADD_DIFF_16x8(
return; return;
} }
static inline void SUB_DIFF_16x8( static INLINE void SUB_DIFF_16x8(
uint8x16_t qdiffu8, uint8x16_t qdiffu8,
uint8x16_t *q8u8, uint8x16_t *q8u8,
uint8x16_t *q9u8, uint8x16_t *q9u8,
@ -82,7 +84,7 @@ static inline void SUB_DIFF_16x8(
return; return;
} }
static inline void ST_16x8( static INLINE void ST_16x8(
uint8_t *d, uint8_t *d,
int d_stride, int d_stride,
uint8x16_t *q8u8, uint8x16_t *q8u8,

View File

@ -10,6 +10,8 @@
#include <arm_neon.h> #include <arm_neon.h>
#include "./vpx_config.h"
static int16_t cospi_1_64 = 16364; static int16_t cospi_1_64 = 16364;
static int16_t cospi_2_64 = 16305; static int16_t cospi_2_64 = 16305;
static int16_t cospi_3_64 = 16207; static int16_t cospi_3_64 = 16207;
@ -57,7 +59,7 @@ static int16_t cospi_31_64 = 804;
#define STORE_COMBINE_CENTER_RESULTS(r10, r9) \ #define STORE_COMBINE_CENTER_RESULTS(r10, r9) \
__STORE_COMBINE_CENTER_RESULTS(r10, r9, stride, \ __STORE_COMBINE_CENTER_RESULTS(r10, r9, stride, \
q6s16, q7s16, q8s16, q9s16); q6s16, q7s16, q8s16, q9s16);
static inline void __STORE_COMBINE_CENTER_RESULTS( static INLINE void __STORE_COMBINE_CENTER_RESULTS(
uint8_t *p1, uint8_t *p1,
uint8_t *p2, uint8_t *p2,
int stride, int stride,
@ -105,7 +107,7 @@ static inline void __STORE_COMBINE_CENTER_RESULTS(
#define STORE_COMBINE_EXTREME_RESULTS(r7, r6); \ #define STORE_COMBINE_EXTREME_RESULTS(r7, r6); \
__STORE_COMBINE_EXTREME_RESULTS(r7, r6, stride, \ __STORE_COMBINE_EXTREME_RESULTS(r7, r6, stride, \
q4s16, q5s16, q6s16, q7s16); q4s16, q5s16, q6s16, q7s16);
static inline void __STORE_COMBINE_EXTREME_RESULTS( static INLINE void __STORE_COMBINE_EXTREME_RESULTS(
uint8_t *p1, uint8_t *p1,
uint8_t *p2, uint8_t *p2,
int stride, int stride,
@ -152,7 +154,7 @@ static inline void __STORE_COMBINE_EXTREME_RESULTS(
#define DO_BUTTERFLY_STD(const_1, const_2, qA, qB) \ #define DO_BUTTERFLY_STD(const_1, const_2, qA, qB) \
DO_BUTTERFLY(q14s16, q13s16, const_1, const_2, qA, qB); DO_BUTTERFLY(q14s16, q13s16, const_1, const_2, qA, qB);
static inline void DO_BUTTERFLY( static INLINE void DO_BUTTERFLY(
int16x8_t q14s16, int16x8_t q14s16,
int16x8_t q13s16, int16x8_t q13s16,
int16_t first_const, int16_t first_const,
@ -194,7 +196,7 @@ static inline void DO_BUTTERFLY(
return; return;
} }
static inline void idct32_transpose_pair( static INLINE void idct32_transpose_pair(
int16_t *input, int16_t *input,
int16_t *t_buf) { int16_t *t_buf) {
int16_t *in; int16_t *in;
@ -288,7 +290,7 @@ static inline void idct32_transpose_pair(
return; return;
} }
static inline void idct32_bands_end_1st_pass( static INLINE void idct32_bands_end_1st_pass(
int16_t *out, int16_t *out,
int16x8_t q2s16, int16x8_t q2s16,
int16x8_t q3s16, int16x8_t q3s16,
@ -383,7 +385,7 @@ static inline void idct32_bands_end_1st_pass(
return; return;
} }
static inline void idct32_bands_end_2nd_pass( static INLINE void idct32_bands_end_2nd_pass(
int16_t *out, int16_t *out,
uint8_t *dest, uint8_t *dest,
int stride, int stride,

View File

@ -10,6 +10,8 @@
#include <arm_neon.h> #include <arm_neon.h>
#include "./vpx_config.h"
static int16_t cospi_4_64 = 16069; static int16_t cospi_4_64 = 16069;
static int16_t cospi_8_64 = 15137; static int16_t cospi_8_64 = 15137;
static int16_t cospi_12_64 = 13623; static int16_t cospi_12_64 = 13623;
@ -18,7 +20,7 @@ static int16_t cospi_20_64 = 9102;
static int16_t cospi_24_64 = 6270; static int16_t cospi_24_64 = 6270;
static int16_t cospi_28_64 = 3196; static int16_t cospi_28_64 = 3196;
static inline void TRANSPOSE8X8( static INLINE void TRANSPOSE8X8(
int16x8_t *q8s16, int16x8_t *q8s16,
int16x8_t *q9s16, int16x8_t *q9s16,
int16x8_t *q10s16, int16x8_t *q10s16,
@ -87,7 +89,7 @@ static inline void TRANSPOSE8X8(
return; return;
} }
static inline void IDCT8x8_1D( static INLINE void IDCT8x8_1D(
int16x8_t *q8s16, int16x8_t *q8s16,
int16x8_t *q9s16, int16x8_t *q9s16,
int16x8_t *q10s16, int16x8_t *q10s16,

View File

@ -12,6 +12,7 @@
#include <assert.h> #include <assert.h>
#include "./vp9_rtcd.h" #include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/common/vp9_common.h" #include "vp9/common/vp9_common.h"
static int16_t sinpi_1_9 = 0x14a3; static int16_t sinpi_1_9 = 0x14a3;
@ -22,7 +23,7 @@ static int16_t cospi_8_64 = 0x3b21;
static int16_t cospi_16_64 = 0x2d41; static int16_t cospi_16_64 = 0x2d41;
static int16_t cospi_24_64 = 0x187e; static int16_t cospi_24_64 = 0x187e;
static inline void TRANSPOSE4X4( static INLINE void TRANSPOSE4X4(
int16x8_t *q8s16, int16x8_t *q8s16,
int16x8_t *q9s16) { int16x8_t *q9s16) {
int32x4_t q8s32, q9s32; int32x4_t q8s32, q9s32;
@ -41,7 +42,7 @@ static inline void TRANSPOSE4X4(
return; return;
} }
static inline void GENERATE_COSINE_CONSTANTS( static INLINE void GENERATE_COSINE_CONSTANTS(
int16x4_t *d0s16, int16x4_t *d0s16,
int16x4_t *d1s16, int16x4_t *d1s16,
int16x4_t *d2s16) { int16x4_t *d2s16) {
@ -51,7 +52,7 @@ static inline void GENERATE_COSINE_CONSTANTS(
return; return;
} }
static inline void GENERATE_SINE_CONSTANTS( static INLINE void GENERATE_SINE_CONSTANTS(
int16x4_t *d3s16, int16x4_t *d3s16,
int16x4_t *d4s16, int16x4_t *d4s16,
int16x4_t *d5s16, int16x4_t *d5s16,
@ -63,7 +64,7 @@ static inline void GENERATE_SINE_CONSTANTS(
return; return;
} }
static inline void IDCT4x4_1D( static INLINE void IDCT4x4_1D(
int16x4_t *d0s16, int16x4_t *d0s16,
int16x4_t *d1s16, int16x4_t *d1s16,
int16x4_t *d2s16, int16x4_t *d2s16,
@ -103,7 +104,7 @@ static inline void IDCT4x4_1D(
return; return;
} }
static inline void IADST4x4_1D( static INLINE void IADST4x4_1D(
int16x4_t *d3s16, int16x4_t *d3s16,
int16x4_t *d4s16, int16x4_t *d4s16,
int16x4_t *d5s16, int16x4_t *d5s16,

View File

@ -12,6 +12,7 @@
#include <assert.h> #include <assert.h>
#include "./vp9_rtcd.h" #include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/common/vp9_common.h" #include "vp9/common/vp9_common.h"
static int16_t cospi_2_64 = 16305; static int16_t cospi_2_64 = 16305;
@ -30,7 +31,7 @@ static int16_t cospi_26_64 = 4756;
static int16_t cospi_28_64 = 3196; static int16_t cospi_28_64 = 3196;
static int16_t cospi_30_64 = 1606; static int16_t cospi_30_64 = 1606;
static inline void TRANSPOSE8X8( static INLINE void TRANSPOSE8X8(
int16x8_t *q8s16, int16x8_t *q8s16,
int16x8_t *q9s16, int16x8_t *q9s16,
int16x8_t *q10s16, int16x8_t *q10s16,
@ -99,7 +100,7 @@ static inline void TRANSPOSE8X8(
return; return;
} }
static inline void IDCT8x8_1D( static INLINE void IDCT8x8_1D(
int16x8_t *q8s16, int16x8_t *q8s16,
int16x8_t *q9s16, int16x8_t *q9s16,
int16x8_t *q10s16, int16x8_t *q10s16,
@ -255,7 +256,7 @@ static inline void IDCT8x8_1D(
return; return;
} }
static inline void IADST8X8_1D( static INLINE void IADST8X8_1D(
int16x8_t *q8s16, int16x8_t *q8s16,
int16x8_t *q9s16, int16x8_t *q9s16,
int16x8_t *q10s16, int16x8_t *q10s16,

View File

@ -10,7 +10,9 @@
#include <arm_neon.h> #include <arm_neon.h>
static inline void vp9_loop_filter_neon( #include "./vpx_config.h"
static INLINE void vp9_loop_filter_neon(
uint8x8_t dblimit, // flimit uint8x8_t dblimit, // flimit
uint8x8_t dlimit, // limit uint8x8_t dlimit, // limit
uint8x8_t dthresh, // thresh uint8x8_t dthresh, // thresh
@ -271,7 +273,7 @@ void vp9_lpf_vertical_4_neon(
return; return;
} }
static inline void vp9_mbloop_filter_neon( static INLINE void vp9_mbloop_filter_neon(
uint8x8_t dblimit, // mblimit uint8x8_t dblimit, // mblimit
uint8x8_t dlimit, // limit uint8x8_t dlimit, // limit
uint8x8_t dthresh, // thresh uint8x8_t dthresh, // thresh