Giving consistent names to IDCT 16x16 functions.
Renames: vp9_short_idct16x16_add -> vp9_idct16x16_256_add vp9_short_idct16x16_10_add -> vp9_idct16x16_10_add vp9_short_idct16x16_1_add -> vp9_idct16x16_1_add vp9_idct_add_16x16 -> vp9_idct16x16_add Change-Id: Ief8a3904de78deab0f4ede944c4d0339c228cfc3
This commit is contained in:
parent
2ae93a776b
commit
b096c5a336
@ -21,7 +21,7 @@
|
||||
extern "C" {
|
||||
#include "vp9/common/vp9_entropy.h"
|
||||
#include "./vp9_rtcd.h"
|
||||
void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *output, int pitch);
|
||||
void vp9_idct16x16_256_add_c(int16_t *input, uint8_t *output, int pitch);
|
||||
}
|
||||
#include "vpx/vpx_integer.h"
|
||||
|
||||
@ -496,7 +496,7 @@ using std::tr1::make_tuple;
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Trans16x16DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct16x16_c, &vp9_short_idct16x16_add_c, 0)));
|
||||
make_tuple(&vp9_short_fdct16x16_c, &vp9_idct16x16_256_add_c, 0)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
C, Trans16x16HT,
|
||||
::testing::Values(
|
||||
@ -510,7 +510,7 @@ INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans16x16DCT,
|
||||
::testing::Values(
|
||||
make_tuple(&vp9_short_fdct16x16_sse2,
|
||||
&vp9_short_idct16x16_add_sse2, 0)));
|
||||
&vp9_idct16x16_256_add_sse2, 0)));
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
SSE2, Trans16x16HT,
|
||||
::testing::Values(
|
||||
|
@ -11,19 +11,19 @@
|
||||
#include "./vp9_rtcd.h"
|
||||
#include "vp9/common/vp9_common.h"
|
||||
|
||||
extern void vp9_short_idct16x16_add_neon_pass1(int16_t *input,
|
||||
extern void vp9_idct16x16_256_add_neon_pass1(int16_t *input,
|
||||
int16_t *output,
|
||||
int output_stride);
|
||||
extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src,
|
||||
extern void vp9_idct16x16_256_add_neon_pass2(int16_t *src,
|
||||
int16_t *output,
|
||||
int16_t *pass1Output,
|
||||
int16_t skip_adding,
|
||||
uint8_t *dest,
|
||||
int dest_stride);
|
||||
extern void vp9_short_idct16x16_10_add_neon_pass1(int16_t *input,
|
||||
extern void vp9_idct16x16_10_add_neon_pass1(int16_t *input,
|
||||
int16_t *output,
|
||||
int output_stride);
|
||||
extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
|
||||
extern void vp9_idct16x16_10_add_neon_pass2(int16_t *src,
|
||||
int16_t *output,
|
||||
int16_t *pass1Output,
|
||||
int16_t skip_adding,
|
||||
@ -34,7 +34,7 @@ extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
|
||||
extern void vp9_push_neon(int64_t *store);
|
||||
extern void vp9_pop_neon(int64_t *store);
|
||||
|
||||
void vp9_short_idct16x16_add_neon(int16_t *input,
|
||||
void vp9_idct16x16_256_add_neon(int16_t *input,
|
||||
uint8_t *dest, int dest_stride) {
|
||||
int64_t store_reg[8];
|
||||
int16_t pass1_output[16*16] = {0};
|
||||
@ -46,12 +46,12 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(input, pass1_output, 8);
|
||||
vp9_idct16x16_256_add_neon_pass1(input, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
vp9_short_idct16x16_add_neon_pass2(input+1,
|
||||
vp9_idct16x16_256_add_neon_pass2(input+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
0,
|
||||
@ -61,12 +61,12 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
|
||||
/* Parallel idct on the lower 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(input+8*16, pass1_output, 8);
|
||||
vp9_idct16x16_256_add_neon_pass1(input+8*16, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
vp9_short_idct16x16_add_neon_pass2(input+8*16+1,
|
||||
vp9_idct16x16_256_add_neon_pass2(input+8*16+1,
|
||||
row_idct_output+8,
|
||||
pass1_output,
|
||||
0,
|
||||
@ -76,12 +76,12 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
|
||||
/* Parallel idct on the left 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);
|
||||
vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,
|
||||
vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
1,
|
||||
@ -91,12 +91,12 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
|
||||
/* Parallel idct on the right 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
|
||||
vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,
|
||||
vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
|
||||
row_idct_output+8,
|
||||
pass1_output,
|
||||
1,
|
||||
@ -109,7 +109,7 @@ void vp9_short_idct16x16_add_neon(int16_t *input,
|
||||
return;
|
||||
}
|
||||
|
||||
void vp9_short_idct16x16_10_add_neon(int16_t *input,
|
||||
void vp9_idct16x16_10_add_neon(int16_t *input,
|
||||
uint8_t *dest, int dest_stride) {
|
||||
int64_t store_reg[8];
|
||||
int16_t pass1_output[16*16] = {0};
|
||||
@ -121,12 +121,12 @@ void vp9_short_idct16x16_10_add_neon(int16_t *input,
|
||||
/* Parallel idct on the upper 8 rows */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
|
||||
vp9_idct16x16_10_add_neon_pass1(input, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7
|
||||
// which will be saved into row_idct_output.
|
||||
vp9_short_idct16x16_10_add_neon_pass2(input+1,
|
||||
vp9_idct16x16_10_add_neon_pass2(input+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
0,
|
||||
@ -138,12 +138,12 @@ void vp9_short_idct16x16_10_add_neon(int16_t *input,
|
||||
/* Parallel idct on the left 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output, pass1_output, 8);
|
||||
vp9_idct16x16_256_add_neon_pass1(row_idct_output, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+1,
|
||||
vp9_idct16x16_256_add_neon_pass2(row_idct_output+1,
|
||||
row_idct_output,
|
||||
pass1_output,
|
||||
1,
|
||||
@ -153,12 +153,12 @@ void vp9_short_idct16x16_10_add_neon(int16_t *input,
|
||||
/* Parallel idct on the right 8 columns */
|
||||
// First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the
|
||||
// stage 6 result in pass1_output.
|
||||
vp9_short_idct16x16_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
|
||||
vp9_idct16x16_256_add_neon_pass1(row_idct_output+8*16, pass1_output, 8);
|
||||
|
||||
// Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines
|
||||
// with result in pass1(pass1_output) to calculate final result in stage 7.
|
||||
// Then add the result to the destination data.
|
||||
vp9_short_idct16x16_add_neon_pass2(row_idct_output+8*16+1,
|
||||
vp9_idct16x16_256_add_neon_pass2(row_idct_output+8*16+1,
|
||||
row_idct_output+8,
|
||||
pass1_output,
|
||||
1,
|
||||
|
@ -8,21 +8,21 @@
|
||||
;
|
||||
|
||||
|
||||
EXPORT |vp9_short_idct16x16_1_add_neon|
|
||||
EXPORT |vp9_idct16x16_1_add_neon|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
|
||||
AREA ||.text||, CODE, READONLY, ALIGN=2
|
||||
|
||||
;void vp9_short_idct16x16_1_add_neon(int16_t *input, uint8_t *dest,
|
||||
;void vp9_idct16x16_1_add_neon(int16_t *input, uint8_t *dest,
|
||||
; int dest_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
; r1 uint8_t *dest
|
||||
; r2 int dest_stride)
|
||||
|
||||
|vp9_short_idct16x16_1_add_neon| PROC
|
||||
|vp9_idct16x16_1_add_neon| PROC
|
||||
ldrsh r0, [r0]
|
||||
|
||||
; generate cospi_16_64 = 11585
|
||||
@ -193,6 +193,6 @@
|
||||
vst1.64 {d31}, [r12], r2
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct16x16_1_add_neon|
|
||||
ENDP ; |vp9_idct16x16_1_add_neon|
|
||||
|
||||
END
|
||||
|
@ -8,10 +8,10 @@
|
||||
; be found in the AUTHORS file in the root of the source tree.
|
||||
;
|
||||
|
||||
EXPORT |vp9_short_idct16x16_add_neon_pass1|
|
||||
EXPORT |vp9_short_idct16x16_add_neon_pass2|
|
||||
EXPORT |vp9_short_idct16x16_10_add_neon_pass1|
|
||||
EXPORT |vp9_short_idct16x16_10_add_neon_pass2|
|
||||
EXPORT |vp9_idct16x16_256_add_neon_pass1|
|
||||
EXPORT |vp9_idct16x16_256_add_neon_pass2|
|
||||
EXPORT |vp9_idct16x16_10_add_neon_pass1|
|
||||
EXPORT |vp9_idct16x16_10_add_neon_pass2|
|
||||
ARM
|
||||
REQUIRE8
|
||||
PRESERVE8
|
||||
@ -36,7 +36,7 @@
|
||||
MEND
|
||||
|
||||
AREA Block, CODE, READONLY ; name this block of code
|
||||
;void |vp9_short_idct16x16_add_neon_pass1|(int16_t *input,
|
||||
;void |vp9_idct16x16_256_add_neon_pass1|(int16_t *input,
|
||||
; int16_t *output, int output_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
@ -46,7 +46,7 @@
|
||||
; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output
|
||||
; will be stored back into q8-q15 registers. This function will touch q0-q7
|
||||
; registers and use them as buffer during calculation.
|
||||
|vp9_short_idct16x16_add_neon_pass1| PROC
|
||||
|vp9_idct16x16_256_add_neon_pass1| PROC
|
||||
|
||||
; TODO(hkuang): Find a better way to load the elements.
|
||||
; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15
|
||||
@ -273,9 +273,9 @@
|
||||
vst1.64 {d31}, [r1], r2
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct16x16_add_neon_pass1|
|
||||
ENDP ; |vp9_idct16x16_256_add_neon_pass1|
|
||||
|
||||
;void vp9_short_idct16x16_add_neon_pass2(int16_t *src,
|
||||
;void vp9_idct16x16_256_add_neon_pass2(int16_t *src,
|
||||
; int16_t *output,
|
||||
; int16_t *pass1Output,
|
||||
; int16_t skip_adding,
|
||||
@ -292,7 +292,7 @@
|
||||
; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output
|
||||
; will be stored back into q8-q15 registers. This function will touch q0-q7
|
||||
; registers and use them as buffer during calculation.
|
||||
|vp9_short_idct16x16_add_neon_pass2| PROC
|
||||
|vp9_idct16x16_256_add_neon_pass2| PROC
|
||||
push {r3-r9}
|
||||
|
||||
; TODO(hkuang): Find a better way to load the elements.
|
||||
@ -784,9 +784,9 @@ skip_adding_dest
|
||||
end_idct16x16_pass2
|
||||
pop {r3-r9}
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct16x16_add_neon_pass2|
|
||||
ENDP ; |vp9_idct16x16_256_add_neon_pass2|
|
||||
|
||||
;void |vp9_short_idct16x16_10_add_neon_pass1|(int16_t *input,
|
||||
;void |vp9_idct16x16_10_add_neon_pass1|(int16_t *input,
|
||||
; int16_t *output, int output_stride)
|
||||
;
|
||||
; r0 int16_t input
|
||||
@ -796,7 +796,7 @@ end_idct16x16_pass2
|
||||
; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output
|
||||
; will be stored back into q8-q15 registers. This function will touch q0-q7
|
||||
; registers and use them as buffer during calculation.
|
||||
|vp9_short_idct16x16_10_add_neon_pass1| PROC
|
||||
|vp9_idct16x16_10_add_neon_pass1| PROC
|
||||
|
||||
; TODO(hkuang): Find a better way to load the elements.
|
||||
; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15
|
||||
@ -905,9 +905,9 @@ end_idct16x16_pass2
|
||||
vst1.64 {d31}, [r1], r2
|
||||
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct16x16_10_add_neon_pass1|
|
||||
ENDP ; |vp9_idct16x16_10_add_neon_pass1|
|
||||
|
||||
;void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src,
|
||||
;void vp9_idct16x16_10_add_neon_pass2(int16_t *src,
|
||||
; int16_t *output,
|
||||
; int16_t *pass1Output,
|
||||
; int16_t skip_adding,
|
||||
@ -924,7 +924,7 @@ end_idct16x16_pass2
|
||||
; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output
|
||||
; will be stored back into q8-q15 registers. This function will touch q0-q7
|
||||
; registers and use them as buffer during calculation.
|
||||
|vp9_short_idct16x16_10_add_neon_pass2| PROC
|
||||
|vp9_idct16x16_10_add_neon_pass2| PROC
|
||||
push {r3-r9}
|
||||
|
||||
; TODO(hkuang): Find a better way to load the elements.
|
||||
@ -1175,5 +1175,5 @@ end_idct16x16_pass2
|
||||
end_idct10_16x16_pass2
|
||||
pop {r3-r9}
|
||||
bx lr
|
||||
ENDP ; |vp9_short_idct16x16_10_add_neon_pass2|
|
||||
ENDP ; |vp9_idct16x16_10_add_neon_pass2|
|
||||
END
|
||||
|
@ -611,7 +611,7 @@ static void idct16_1d(int16_t *input, int16_t *output) {
|
||||
output[15] = step2[0] - step2[15];
|
||||
}
|
||||
|
||||
void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
||||
void vp9_idct16x16_256_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
||||
int16_t out[16 * 16];
|
||||
int16_t *outptr = out;
|
||||
int i, j;
|
||||
@ -838,7 +838,7 @@ void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride,
|
||||
+ dest[j * dest_stride + i]); }
|
||||
}
|
||||
|
||||
void vp9_short_idct16x16_10_add_c(int16_t *input, uint8_t *dest,
|
||||
void vp9_idct16x16_10_add_c(int16_t *input, uint8_t *dest,
|
||||
int dest_stride) {
|
||||
int16_t out[16 * 16] = { 0 };
|
||||
int16_t *outptr = out;
|
||||
@ -864,7 +864,7 @@ void vp9_short_idct16x16_10_add_c(int16_t *input, uint8_t *dest,
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest,
|
||||
void vp9_idct16x16_1_add_c(int16_t *input, uint8_t *dest,
|
||||
int dest_stride) {
|
||||
int i, j;
|
||||
int a1;
|
||||
@ -1320,17 +1320,17 @@ void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob) {
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_idct_add_16x16(int16_t *input, uint8_t *dest, int stride, int eob) {
|
||||
void vp9_idct16x16_add(int16_t *input, uint8_t *dest, int stride, int eob) {
|
||||
/* The calculation can be simplified if there are not many non-zero dct
|
||||
* coefficients. Use eobs to separate different cases. */
|
||||
if (eob) {
|
||||
if (eob == 1)
|
||||
/* DC only DCT coefficient. */
|
||||
vp9_short_idct16x16_1_add(input, dest, stride);
|
||||
vp9_idct16x16_1_add(input, dest, stride);
|
||||
else if (eob <= 10)
|
||||
vp9_short_idct16x16_10_add(input, dest, stride);
|
||||
vp9_idct16x16_10_add(input, dest, stride);
|
||||
else
|
||||
vp9_short_idct16x16_add(input, dest, stride);
|
||||
vp9_idct16x16_256_add(input, dest, stride);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1366,7 +1366,7 @@ void vp9_iht_add_8x8(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
|
||||
void vp9_iht_add_16x16(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
|
||||
int stride, int eob) {
|
||||
if (tx_type == DCT_DCT) {
|
||||
vp9_idct_add_16x16(input, dest, stride, eob);
|
||||
vp9_idct16x16_add(input, dest, stride, eob);
|
||||
} else {
|
||||
if (eob > 0) {
|
||||
vp9_short_iht16x16_add(input, dest, stride, tx_type);
|
||||
|
@ -91,7 +91,7 @@ typedef struct {
|
||||
void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob);
|
||||
void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob);
|
||||
void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob);
|
||||
void vp9_idct_add_16x16(int16_t *input, uint8_t *dest, int stride, int eob);
|
||||
void vp9_idct16x16_add(int16_t *input, uint8_t *dest, int stride, int eob);
|
||||
void vp9_idct_add_32x32(int16_t *input, uint8_t *dest, int stride, int eob);
|
||||
|
||||
void vp9_iht_add(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
|
||||
|
@ -282,14 +282,14 @@ specialize vp9_idct8x8_64_add sse2 neon
|
||||
prototype void vp9_idct8x8_10_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_idct8x8_10_add sse2 neon
|
||||
|
||||
prototype void vp9_short_idct16x16_1_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct16x16_1_add sse2 neon
|
||||
prototype void vp9_idct16x16_1_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_idct16x16_1_add sse2 neon
|
||||
|
||||
prototype void vp9_short_idct16x16_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct16x16_add sse2 neon
|
||||
prototype void vp9_idct16x16_256_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_idct16x16_256_add sse2 neon
|
||||
|
||||
prototype void vp9_short_idct16x16_10_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct16x16_10_add sse2 neon
|
||||
prototype void vp9_idct16x16_10_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_idct16x16_10_add sse2 neon
|
||||
|
||||
prototype void vp9_short_idct32x32_add "int16_t *input, uint8_t *dest, int dest_stride"
|
||||
specialize vp9_short_idct32x32_add sse2 neon
|
||||
|
@ -1263,7 +1263,7 @@ void vp9_idct8x8_10_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||
stp2_10, stp2_13, stp2_11, stp2_12) \
|
||||
}
|
||||
|
||||
void vp9_short_idct16x16_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||
void vp9_idct16x16_256_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
|
||||
const __m128i final_rounding = _mm_set1_epi16(1<<5);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
@ -1470,7 +1470,7 @@ void vp9_short_idct16x16_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||
}
|
||||
}
|
||||
|
||||
void vp9_short_idct16x16_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||
void vp9_idct16x16_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
|
||||
__m128i dc_value;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
int a, i;
|
||||
@ -2456,7 +2456,7 @@ void vp9_short_iht16x16_add_sse2(int16_t *input, uint8_t *dest, int stride,
|
||||
write_buffer_8x16(dest, in1, stride);
|
||||
}
|
||||
|
||||
void vp9_short_idct16x16_10_add_sse2(int16_t *input, uint8_t *dest,
|
||||
void vp9_idct16x16_10_add_sse2(int16_t *input, uint8_t *dest,
|
||||
int stride) {
|
||||
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
|
||||
const __m128i final_rounding = _mm_set1_epi16(1<<5);
|
||||
|
@ -457,7 +457,7 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize,
|
||||
vp9_short_idct32x32_add(dqcoeff, dst, pd->dst.stride);
|
||||
break;
|
||||
case TX_16X16:
|
||||
vp9_idct_add_16x16(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
|
||||
vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
|
||||
break;
|
||||
case TX_8X8:
|
||||
vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
|
||||
|
Loading…
Reference in New Issue
Block a user