Merge "Giving consistent names to IDCT/IWHT functions."
This commit is contained in:
commit
9dba044be2
@ -31,7 +31,7 @@ void fdct4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
|||||||
}
|
}
|
||||||
void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
|
||||||
int stride, int /*tx_type*/) {
|
int stride, int /*tx_type*/) {
|
||||||
vp9_short_idct4x4_add_c(out, dst, stride >> 1);
|
vp9_idct4x4_16_add_c(out, dst, stride >> 1);
|
||||||
}
|
}
|
||||||
void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
|
||||||
int stride, int tx_type) {
|
int stride, int tx_type) {
|
||||||
|
@ -8,21 +8,21 @@
|
|||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
EXPORT |vp9_short_idct4x4_1_add_neon|
|
EXPORT |vp9_idct4x4_1_add_neon|
|
||||||
ARM
|
ARM
|
||||||
REQUIRE8
|
REQUIRE8
|
||||||
PRESERVE8
|
PRESERVE8
|
||||||
|
|
||||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||||
|
|
||||||
;void vp9_short_idct4x4_1_add_neon(int16_t *input, uint8_t *dest,
|
;void vp9_idct4x4_1_add_neon(int16_t *input, uint8_t *dest,
|
||||||
; int dest_stride)
|
; int dest_stride)
|
||||||
;
|
;
|
||||||
; r0 int16_t input
|
; r0 int16_t input
|
||||||
; r1 uint8_t *dest
|
; r1 uint8_t *dest
|
||||||
; r2 int dest_stride)
|
; r2 int dest_stride)
|
||||||
|
|
||||||
|vp9_short_idct4x4_1_add_neon| PROC
|
|vp9_idct4x4_1_add_neon| PROC
|
||||||
ldrsh r0, [r0]
|
ldrsh r0, [r0]
|
||||||
|
|
||||||
; generate cospi_16_64 = 11585
|
; generate cospi_16_64 = 11585
|
||||||
@ -63,6 +63,6 @@
|
|||||||
vst1.32 {d7[1]}, [r12]
|
vst1.32 {d7[1]}, [r12]
|
||||||
|
|
||||||
bx lr
|
bx lr
|
||||||
ENDP ; |vp9_short_idct4x4_1_add_neon|
|
ENDP ; |vp9_idct4x4_1_add_neon|
|
||||||
|
|
||||||
END
|
END
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
; be found in the AUTHORS file in the root of the source tree.
|
; be found in the AUTHORS file in the root of the source tree.
|
||||||
;
|
;
|
||||||
|
|
||||||
EXPORT |vp9_short_idct4x4_add_neon|
|
EXPORT |vp9_idct4x4_16_add_neon|
|
||||||
ARM
|
ARM
|
||||||
REQUIRE8
|
REQUIRE8
|
||||||
PRESERVE8
|
PRESERVE8
|
||||||
@ -16,13 +16,13 @@
|
|||||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||||
|
|
||||||
AREA Block, CODE, READONLY ; name this block of code
|
AREA Block, CODE, READONLY ; name this block of code
|
||||||
;void vp9_short_idct4x4_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
|
;void vp9_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
|
||||||
;
|
;
|
||||||
; r0 int16_t input
|
; r0 int16_t input
|
||||||
; r1 uint8_t *dest
|
; r1 uint8_t *dest
|
||||||
; r2 int dest_stride)
|
; r2 int dest_stride)
|
||||||
|
|
||||||
|vp9_short_idct4x4_add_neon| PROC
|
|vp9_idct4x4_16_add_neon| PROC
|
||||||
|
|
||||||
; The 2D transform is done with two passes which are actually pretty
|
; The 2D transform is done with two passes which are actually pretty
|
||||||
; similar. We first transform the rows. This is done by transposing
|
; similar. We first transform the rows. This is done by transposing
|
||||||
@ -185,6 +185,6 @@
|
|||||||
vst1.32 {d26[1]}, [r1], r2
|
vst1.32 {d26[1]}, [r1], r2
|
||||||
vst1.32 {d26[0]}, [r1] ; no post-increment
|
vst1.32 {d26[0]}, [r1] ; no post-increment
|
||||||
bx lr
|
bx lr
|
||||||
ENDP ; |vp9_short_idct4x4_add_neon|
|
ENDP ; |vp9_idct4x4_16_add_neon|
|
||||||
|
|
||||||
END
|
END
|
||||||
|
@ -18,7 +18,7 @@
|
|||||||
#include "vp9/common/vp9_common.h"
|
#include "vp9/common/vp9_common.h"
|
||||||
#include "vp9/common/vp9_idct.h"
|
#include "vp9/common/vp9_idct.h"
|
||||||
|
|
||||||
void vp9_short_iwalsh4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
void vp9_iwht4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
||||||
/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
|
/* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
|
||||||
0.5 shifts per pixel. */
|
0.5 shifts per pixel. */
|
||||||
int i;
|
int i;
|
||||||
@ -70,7 +70,7 @@ void vp9_short_iwalsh4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_short_iwalsh4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) {
|
void vp9_iwht4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) {
|
||||||
int i;
|
int i;
|
||||||
int a1, e1;
|
int a1, e1;
|
||||||
int16_t tmp[4];
|
int16_t tmp[4];
|
||||||
@ -116,7 +116,7 @@ void vp9_idct4_1d_c(int16_t *input, int16_t *output) {
|
|||||||
output[3] = step[0] - step[3];
|
output[3] = step[0] - step[3];
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
void vp9_idct4x4_16_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
||||||
int16_t out[4 * 4];
|
int16_t out[4 * 4];
|
||||||
int16_t *outptr = out;
|
int16_t *outptr = out;
|
||||||
int i, j;
|
int i, j;
|
||||||
@ -140,7 +140,7 @@ void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
void vp9_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
||||||
int i;
|
int i;
|
||||||
int a1;
|
int a1;
|
||||||
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
||||||
@ -1286,20 +1286,19 @@ void vp9_short_idct32x32_1_add_c(int16_t *input, uint8_t *dest,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// idct
|
// idct
|
||||||
void vp9_idct_add(int16_t *input, uint8_t *dest, int stride, int eob) {
|
void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob) {
|
||||||
if (eob > 1)
|
if (eob > 1)
|
||||||
vp9_short_idct4x4_add(input, dest, stride);
|
vp9_idct4x4_16_add(input, dest, stride);
|
||||||
else
|
else
|
||||||
vp9_short_idct4x4_1_add(input, dest, stride);
|
vp9_idct4x4_1_add(input, dest, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void vp9_idct_add_lossless(int16_t *input, uint8_t *dest, int stride,
|
void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob) {
|
||||||
int eob) {
|
|
||||||
if (eob > 1)
|
if (eob > 1)
|
||||||
vp9_short_iwalsh4x4_add(input, dest, stride);
|
vp9_iwht4x4_16_add(input, dest, stride);
|
||||||
else
|
else
|
||||||
vp9_short_iwalsh4x4_1_add_c(input, dest, stride);
|
vp9_iwht4x4_1_add(input, dest, stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob) {
|
void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob) {
|
||||||
@ -1348,7 +1347,7 @@ void vp9_idct_add_32x32(int16_t *input, uint8_t *dest, int stride, int eob) {
|
|||||||
void vp9_iht_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride,
|
void vp9_iht_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride,
|
||||||
int eob) {
|
int eob) {
|
||||||
if (tx_type == DCT_DCT)
|
if (tx_type == DCT_DCT)
|
||||||
vp9_idct_add(input, dest, stride, eob);
|
vp9_idct4x4_add(input, dest, stride, eob);
|
||||||
else
|
else
|
||||||
vp9_short_iht4x4_add(input, dest, stride, tx_type);
|
vp9_short_iht4x4_add(input, dest, stride, tx_type);
|
||||||
}
|
}
|
||||||
|
@ -88,9 +88,8 @@ typedef struct {
|
|||||||
} transform_2d;
|
} transform_2d;
|
||||||
|
|
||||||
|
|
||||||
void vp9_idct_add(int16_t *input, uint8_t *dest, int stride, int eob);
|
void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob);
|
||||||
void vp9_idct_add_lossless(int16_t *input, uint8_t *dest,
|
void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob);
|
||||||
int stride, int eob);
|
|
||||||
void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob);
|
void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob);
|
||||||
void vp9_idct_add_16x16(int16_t *input, uint8_t *dest, int stride, int eob);
|
void vp9_idct_add_16x16(int16_t *input, uint8_t *dest, int stride, int eob);
|
||||||
void vp9_idct_add_32x32(int16_t *input, uint8_t *dest, int stride, int eob);
|
void vp9_idct_add_32x32(int16_t *input, uint8_t *dest, int stride, int eob);
|
||||||
|
@ -267,11 +267,11 @@ specialize vp9_convolve8_avg_vert ssse3 neon dspr2
|
|||||||
#
|
#
|
||||||
# dct
|
# dct
|
||||||
#
|
#
|
||||||
prototype void vp9_short_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
|
prototype void vp9_idct4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||||
specialize vp9_short_idct4x4_1_add sse2 neon
|
specialize vp9_idct4x4_1_add sse2 neon
|
||||||
|
|
||||||
prototype void vp9_short_idct4x4_add "int16_t *input, uint8_t *dest, int dest_stride"
|
prototype void vp9_idct4x4_16_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||||
specialize vp9_short_idct4x4_add sse2 neon
|
specialize vp9_idct4x4_16_add sse2 neon
|
||||||
|
|
||||||
prototype void vp9_short_idct8x8_1_add "int16_t *input, uint8_t *dest, int dest_stride"
|
prototype void vp9_short_idct8x8_1_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||||
specialize vp9_short_idct8x8_1_add sse2 neon
|
specialize vp9_short_idct8x8_1_add sse2 neon
|
||||||
@ -310,11 +310,11 @@ prototype void vp9_idct4_1d "int16_t *input, int16_t *output"
|
|||||||
specialize vp9_idct4_1d sse2
|
specialize vp9_idct4_1d sse2
|
||||||
# dct and add
|
# dct and add
|
||||||
|
|
||||||
prototype void vp9_short_iwalsh4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
|
prototype void vp9_iwht4x4_1_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||||
specialize vp9_short_iwalsh4x4_1_add
|
specialize vp9_iwht4x4_1_add
|
||||||
|
|
||||||
prototype void vp9_short_iwalsh4x4_add "int16_t *input, uint8_t *dest, int dest_stride"
|
prototype void vp9_iwht4x4_16_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||||
specialize vp9_short_iwalsh4x4_add
|
specialize vp9_iwht4x4_16_add
|
||||||
|
|
||||||
#
|
#
|
||||||
# Encoder functions below this point.
|
# Encoder functions below this point.
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
#include "vp9/common/vp9_common.h"
|
#include "vp9/common/vp9_common.h"
|
||||||
#include "vp9/common/vp9_idct.h"
|
#include "vp9/common/vp9_idct.h"
|
||||||
|
|
||||||
void vp9_short_idct4x4_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
void vp9_idct4x4_16_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||||
const __m128i zero = _mm_setzero_si128();
|
const __m128i zero = _mm_setzero_si128();
|
||||||
const __m128i eight = _mm_set1_epi16(8);
|
const __m128i eight = _mm_set1_epi16(8);
|
||||||
const __m128i cst = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64,
|
const __m128i cst = _mm_setr_epi16((int16_t)cospi_16_64, (int16_t)cospi_16_64,
|
||||||
@ -148,7 +148,7 @@ void vp9_short_idct4x4_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
|||||||
RECON_AND_STORE4X4(dest, input3);
|
RECON_AND_STORE4X4(dest, input3);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vp9_short_idct4x4_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
void vp9_idct4x4_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||||
__m128i dc_value;
|
__m128i dc_value;
|
||||||
const __m128i zero = _mm_setzero_si128();
|
const __m128i zero = _mm_setzero_si128();
|
||||||
int a;
|
int a;
|
||||||
|
@ -490,8 +490,7 @@ static void setup_quantization(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) {
|
|||||||
cm->uv_dc_delta_q == 0 &&
|
cm->uv_dc_delta_q == 0 &&
|
||||||
cm->uv_ac_delta_q == 0;
|
cm->uv_ac_delta_q == 0;
|
||||||
|
|
||||||
xd->itxm_add = xd->lossless ? vp9_idct_add_lossless
|
xd->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
|
||||||
: vp9_idct_add;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static INTERPOLATIONFILTERTYPE read_interp_filter_type(
|
static INTERPOLATIONFILTERTYPE read_interp_filter_type(
|
||||||
|
@ -1866,8 +1866,8 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
|
|||||||
// printf("Switching to lossless\n");
|
// printf("Switching to lossless\n");
|
||||||
cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4;
|
cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4;
|
||||||
cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;
|
cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;
|
||||||
cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_iwalsh4x4_1_add;
|
cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_iwht4x4_1_add;
|
||||||
cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_iwalsh4x4_add;
|
cpi->mb.e_mbd.inv_txm4x4_add = vp9_iwht4x4_16_add;
|
||||||
cpi->mb.optimize = 0;
|
cpi->mb.optimize = 0;
|
||||||
cpi->common.lf.filter_level = 0;
|
cpi->common.lf.filter_level = 0;
|
||||||
cpi->zbin_mode_boost_enabled = 0;
|
cpi->zbin_mode_boost_enabled = 0;
|
||||||
@ -1876,8 +1876,8 @@ static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
|
|||||||
// printf("Not lossless\n");
|
// printf("Not lossless\n");
|
||||||
cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;
|
cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;
|
||||||
cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
|
cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
|
||||||
cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_idct4x4_1_add;
|
cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_idct4x4_1_add;
|
||||||
cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_idct4x4_add;
|
cpi->mb.e_mbd.inv_txm4x4_add = vp9_idct4x4_16_add;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1261,11 +1261,11 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) {
|
|||||||
|
|
||||||
cpi->oxcf.lossless = oxcf->lossless;
|
cpi->oxcf.lossless = oxcf->lossless;
|
||||||
if (cpi->oxcf.lossless) {
|
if (cpi->oxcf.lossless) {
|
||||||
cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_iwalsh4x4_1_add;
|
cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_iwht4x4_1_add;
|
||||||
cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_iwalsh4x4_add;
|
cpi->mb.e_mbd.inv_txm4x4_add = vp9_iwht4x4_16_add;
|
||||||
} else {
|
} else {
|
||||||
cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_short_idct4x4_1_add;
|
cpi->mb.e_mbd.inv_txm4x4_1_add = vp9_idct4x4_1_add;
|
||||||
cpi->mb.e_mbd.inv_txm4x4_add = vp9_short_idct4x4_add;
|
cpi->mb.e_mbd.inv_txm4x4_add = vp9_idct4x4_16_add;
|
||||||
}
|
}
|
||||||
|
|
||||||
cpi->baseline_gf_interval = DEFAULT_GF_INTERVAL;
|
cpi->baseline_gf_interval = DEFAULT_GF_INTERVAL;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user